For the following analyses we will require the use of a number of different R packages. We can use the following code to quickly load in the packages and install any packages not previously installed in the R console.
if (!require("pacman")) install.packages("pacman")
pacman::p_load_gh("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis", "ropensci/rnaturalearthhires", "KarstensLab/microshades")
pacman::p_load("cowplot", "car", "ggrepel", "ggspatial", "paletteer", "patchwork", "rgdal", "rnaturalearth", "sf", "Hmisc", "MCMC.OTU", "pairwiseAdonis", "RColorBrewer", "Redmonder", "flextable", "lubridate", "officer", "adegenet", "dendextend", "gdata", "ggdendro", "hierfstat", "kableExtra", "poppr", "reshape2", "StAMPP", "vcfR", "vegan", "boa", "magick", "rgeos", "sdmpredictors", "ggcorrplot", "tidyverse", "TeachingDemos", "LaplacesDemon", "adespatial", "ggnewscale", "ggbeeswarm", "multcomp", "rstatix", "R.utils")
options("scipen" = 10)
load("fknmsSint.RData")
Making color palettes to use throughout all plots
flPal = c(paletteer_d("vapoRwave::jazzCup")[c(1, 3:4)], "#4B31B3")
boundPal = c("gray30", paletteer_d("vapoRwave::vapoRwave")[10])
teal = "#089099FF"
purple = paletteer_d("vapoRwave::vapoRwave")[10]
kColPal = c(paletteer_d("rcartocolor::BluYl")[c(7, 5, 3)], "#f5e97a", "lavender")
profPal = rev(c(microshades_palette("micro_green", 5), microshades_palette("micro_cvd_turquoise", 5), microshades_palette("micro_cvd_orange", 3),microshades_palette("micro_cvd_purple", 1, lightest = F), microshades_palette("micro_purple", 5)))
colPalZoox = c("#807dba", "#F09163", "#48C9B0", "#FEEDA0")
fknmsSites = read.csv("../data/stephanocoeniaMetaData.csv", header = TRUE)
fknmsSites$depthZone = factor(fknmsSites$depthZone)
fknmsSites$depthZone = factor(fknmsSites$depthZone, levels = levels(fknmsSites$depthZone)[c(2,1)])
fknmsSites$site = factor(fknmsSites$site)
fknmsSites$site = factor(fknmsSites$site, levels = levels(fknmsSites$site)[c(4, 1, 3, 2)])
fknmsSites$date = mdy(fknmsSites$date) %>% format("%d %b %Y")
fknmsPops = fknmsSites %>% group_by(site) %>% summarise(latDD = mean(latDD), longDD = mean(longDD), n = n()) %>% droplevels()
fknmsSampleSites = fknmsSites %>% group_by(site, siteID, depthZone) %>% summarise(latDD = min(latDD), longDD = min(longDD))
## `summarise()` has grouped output by 'site', 'siteID'. You can override using the
## `.groups` argument.
fknmsBounds = read.csv("../data/shp/fknmsSPA.csv", header = TRUE)
states = st_as_sf(ne_states(country = c("United States of America")), scale = "count", crs = 4326) %>% filter(name_en %in% c("Florida", "Georgia", "Alabama"))
countries = st_as_sf(ne_countries(country = c("Cuba", "Mexico", "The Bahamas", "Bermuda"), scale = "Large"), crs = 4326)
bahamas = read_sf("../data/shp/bahamasShoreline.shp") %>% st_transform(crs = 4326)
cuba = read_sf("../data/shp/cubaShoreline.shp") %>% st_transform(crs = 4326)
florida = read_sf("../data/shp/floridaShoreline.shp") %>% st_transform(crs = 4326)
bathy = read_sf("../data/shp/flBathy.shp") %>% st_transform(crs = 4326) %>% subset(subset = DATASET %in% c("fl_shelf", "fl_coast"))
tortugasBathy = read_sf("../data/shp/tortugasBathy.shp") %>% st_transform(crs = 4326)
Next we build a hi-res polygon of FL with the study site marked and a
zoomed in map of the colony locations. We use ggspatial
to
add a north arrow and scale bar to the main map.
floridaMap = ggplot() +
geom_polygon(data = fknmsBounds[fknmsBounds$type == "Sanctuary",], aes(x = long, y = lat, group = location), alpha = 0.1, fill = "black", color = NA) +
geom_polygon(data = fknmsBounds[fknmsBounds$location == "FKNMS2",], aes(x = long, y = lat), fill = "aliceblue", color = NA) +
geom_polygon(data = fknmsBounds, aes(x = long, y = lat, color = type, group = location), fill = NA) +
scale_fill_manual(values = flPal, name = "Site") +
scale_color_manual(values = boundPal, name = "Boundaries", labels = c("FKNMS", "SPA")) +
geom_point(data = fknmsSites, aes(x = longDD, y = latDD, shape = depthZone), size = 0) +
scale_shape_manual(values = c(24, 25), name = "Depth") +
geom_sf(data = florida, fill = "white", linewidth = 0.15) +
geom_sf(data = cuba, fill = "white", linewidth = 0.15) +
geom_sf(data = bahamas, fill = "white", linewidth = 0.15) +
geom_segment(aes(x = -80.1, y = 25.3, xend = -78.825, yend = 24.44), linewidth = 0.25) +
geom_segment(aes(x = -80.4, y = 25, xend = -80.27, yend = 23), linewidth = 0.25) +
geom_segment(aes(x = -81.75, y = 24.7, xend = -82.22, yend = 24.28), linewidth = 0.25) +
geom_segment(aes(x = -81.45, y = 24.7, xend = -80.78, yend = 24.28), linewidth = 0.25) +
geom_segment(aes(x = -83.25, y = 24.75, xend = -84.183, yend = 24.28), linewidth = 0.25) +
geom_segment(aes(x = -82.95, y = 24.75, xend = -82.74, yend = 24.28), linewidth = 0.25) +
geom_rect(aes(xmin = -80.4, xmax = -80.1, ymin = 25, ymax = 25.3), fill = teal, color = "black", linewidth = 0.25, alpha = 0.5) +
geom_rect(aes(xmin = -81.75, xmax = -81.45, ymin = 24.4, ymax = 24.7), fill = teal, color = "black", linewidth = 0.25, alpha = 0.5) +
geom_rect(aes(xmin = -83.25, xmax = -82.95, ymin = 24.45, ymax = 24.75), fill = teal, color = "black", linewidth = 0.25, alpha = 0.5) +
geom_point(data = fknmsPops, aes(x = longDD, y = latDD, fill = site), shape = 21, size = 2) +
coord_sf(xlim = c(-84, -79), ylim = c(23, 27)) +
scale_x_continuous(breaks = c(seq(-84, -79, by = 1))) +
scale_y_continuous(breaks = c(seq(23, 27, by = 1))) +
annotation_scale(location = "br", pad_x = unit(1.35, "cm"), text_pad = unit(-3.75, "cm")) +
guides(fill = guide_legend(override.aes = list(shape = 22, color = "black", size = 2, stroke = 0.25), order = 1), shape = guide_legend(override.aes = list(size = 2, stroke = 0.25), order = 2), color = "none") +
theme_bw() +
theme(panel.background = element_rect(fill = "aliceblue"),
plot.background = element_blank(),
panel.border = element_rect(color = "black", size = 1, fill = NA),
axis.title = element_blank(),
axis.ticks = element_line(color = "black"),
axis.text = element_text(color = "black"),
legend.position = c(0.905, 0.875),
legend.box.background = element_rect(linewidth = 0.35, fill = "white"),
legend.title = element_text(color = "black", size = 8),
legend.text = element_text(color = "black", size = 8),
legend.spacing = unit(-5, "pt"),
legend.key.size = unit(5, "pt"),
legend.background = element_blank()
)
# floridaMap
largeMap = inset = ggplot() +
geom_sf(data = states, fill = "white", linewidth = 0.3) +
geom_sf(data = countries, fill = "white", linewidth = 0.3) +
geom_rect(aes(xmin = -84, xmax = -79, ymin = 23, ymax = 27), color = teal, fill = teal, alpha = 0.25, linewidth = 0.5) +
geom_rect(aes(xmin = -78.8, xmax = -77, ymin = 22.2, ymax = 22.6), fill = "aliceblue", color = NA) +
annotation_scale(location = "bl", pad_x = unit(2.25, "cm")) +
annotation_north_arrow(location = "tr", style = north_arrow_minimal(), pad_x = unit(-0.3, "cm")) +
coord_sf(xlim = c(-87, -76), ylim = c(22, 31)) +
theme_bw() +
theme(legend.title = element_text(size = 9, face = "bold"),
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
panel.background = element_rect(fill = "aliceblue"),
panel.border = element_rect(color = "black", size = 1, fill = NA),
legend.position = "none",
plot.background = element_blank())
# largeMap
inset = ggplot() +
geom_polygon(data = fknmsBounds[fknmsBounds$type == "Sanctuary",], aes(x = long, y = lat, group = location), alpha = 0.1, fill = "black", color = NA) +
geom_polygon(data = fknmsBounds[fknmsBounds$location == "FKNMS2",], aes(x = long, y = lat), fill = "aliceblue", color = NA) +
geom_segment(aes(x = -82.9645, xend = -82.4, y = 24.6, yend = 24.6), color = "gray92", size = .55) +
geom_sf(data = bathy, color = "gray75", size = 0.25) +
geom_polygon(data = fknmsBounds, aes(x = long, y = lat, color = type, group = location), fill = NA) +
scale_fill_manual(values = flPal, name = "Site") +
scale_color_manual(values = boundPal, name = "Boundaries", labels = c("FKNMS", "SPA")) +
geom_point(data = fknmsSampleSites, aes(x = longDD, y = latDD, fill = site, shape = depthZone), size = 1.5) +
geom_sf(data = florida, fill = "white", size = 0.15) +
scale_shape_manual(values = c(24, 25), name = "Depth") +
theme_bw() +
theme(legend.title = element_text(size = 9, face = "bold"),
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
panel.background = element_rect(fill = "aliceblue"),
panel.border = element_rect(color = "black", size = 1, fill = NA),
legend.position = "none",
plot.background = element_blank())
# inset
upperKeys = inset +
annotation_scale(location = "bl", pad_x = unit(1.9, "cm")) +
coord_sf(xlim = c(-80.4, -80.1), ylim = c(25.0, 25.3)) +
scale_x_continuous(breaks = c(seq(-80.4, -80.0, by = .1))) +
scale_y_continuous(breaks = c(seq(25.0, 25.3, by = .1)))
lowerKeys = inset +
annotation_scale(location = "bl", pad_x = unit(1.9, "cm")) +
coord_sf(xlim = c(-81.75, -81.45), ylim = c(24.4, 24.7)) +
scale_x_continuous(breaks = c(seq(-81.7, -81.3, by = .1))) +
scale_y_continuous(breaks = c(seq(24.4, 24.7, by = .1)))
dryTortugas = ggplot() +
geom_polygon(data = fknmsBounds[fknmsBounds$type == "Sanctuary",], aes(x = long, y = lat, group = location), alpha = 0.1, fill = "black", color = NA) +
geom_polygon(data = fknmsBounds[fknmsBounds$location == "FKNMS2",], aes(x = long, y = lat), fill = "aliceblue", color = NA) +
geom_segment(aes(x = -82.9645, xend = -82.4, y = 24.6, yend = 24.6), color = "gray92", size = .55) +
geom_sf(data = tortugasBathy, color = "gray75", size = 0.25) +
geom_polygon(data = fknmsBounds, aes(x = long, y = lat, color = type, group = location), fill = NA) +
scale_fill_manual(values = flPal, name = "Site") +
scale_color_manual(values = boundPal, name = "Boundaries", labels = c("FKNMS", "SPA")) +
geom_point(data = fknmsSites, aes(x = longDD, y = latDD, fill = site, shape = depthZone), size = 1.5) +
geom_sf(data = florida, fill = "white", size = 0.25) +
scale_shape_manual(values = c(24, 25), name = "Depth") +
annotation_scale(location = "bl", pad_x = unit(1.9, "cm")) +
coord_sf(xlim = c(-83.25, -82.95), ylim = c(24.45, 24.75)) +
scale_x_continuous(breaks = c(seq(-83.2, -82.9, by = .1))) +
scale_y_continuous(breaks = c(seq(24.4, 24.7, by = .1))) +
theme_bw() +
theme(legend.title = element_text(size = 9, face = "bold"),
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
panel.background = element_rect(fill = "aliceblue"),
panel.border = element_rect(color = "black", size = 1, fill = NA),
legend.position = "none",
plot.background = element_blank())
popData = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66, 68, 164, 166, 209, 211),] %>% dplyr::select("sample" = tubeID, "pop" = site, "depth" = depthZone, "depthm" = depthM)
popData$popdepth = as.factor(paste(popData$pop, popData$depth, sep = ""))
popData$popdepth = factor(popData$popdepth, levels(popData$popdepth)[c(4, 3, 6, 5, 2, 1, 8, 7)])
pcadmix = read.table("../data/snps/k/ClumppK4.output") %>%dplyr::select(V6, V7, V8, V9)
pcadmix %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9))
## sum(V6) sum(V7) sum(V8) sum(V9)
## 1 128.9256 43.261 31.258 16.5555
pcadmix = popData %>% cbind(pcadmix) %>% rename("cluster1" = "V6", "cluster2" = "V7", "cluster3" = "V8", "cluster4" = "V9") %>%dplyr::select(order(colnames(.)))
pcadmixClust = pcadmix %>% mutate(cluster = ifelse(cluster1 < 0.75 & cluster2 < 0.75 & cluster3 < 0.75 & cluster4 < 0.75, "NA", ifelse(cluster1 >=0.75, 1, ifelse(cluster2 >= 0.75, 2, ifelse(cluster3 >= 0.75, 3,ifelse(cluster4 >= 0.75, 4, 0))))))
pcadmix = pcadmix %>% mutate(pcadmixClust)
pcadmix$cluster = as.factor(pcadmix$cluster)
levels(pcadmix$cluster) = c("Blue", "Teal", "Green", "Yellow", "Admixed")
siteLineages = pcadmix %>% dplyr::select(popdepth, cluster) %>%
group_by(popdepth) %>% count(cluster) %>% mutate(Freq = n/sum(n)) %>% apply(2, function(x) gsub("\\s+", "", x)) %>% as.data.frame()
pieCol = c("Blue" = kColPal[1], Teal = kColPal[2], "Green" = kColPal[3], "Yellow" = kColPal[4], "Admixed" = kColPal[5])
pieDf = siteLineages %>% group_by(popdepth) %>% mutate("ymax" = cumsum(Freq)) %>% mutate("ymin" = c(0, head(ymax, n=-1)))
ukMeso = ggplot() +
geom_rect(aes(ymax=1, ymin = 0, xmax = 4, xmin = 2), fill = flPal[1], alpha = 1) +
geom_rect(data = pieDf%>% filter(popdepth == "UpperKeysMesophotic"), aes(ymax=ymax, ymin = ymin, xmax = 4, xmin = 3, fill = cluster), color = "black", size = 0.25) +
annotate(geom = "text", x = 2, y = 0.75, label = "43.9", size = 2.5, fontface = "bold") +
scale_fill_manual(values = pieCol)+
coord_polar(theta="y") +
xlim(c(2, 4)) +
theme_void() +
theme(legend.position = "none", panel.background = element_blank())
lkMeso = ggplot() +
geom_rect(aes(ymax=1, ymin = 0, xmax = 4, xmin = 2), fill = flPal[2], alpha = 1) +
geom_rect(data = pieDf%>% filter(popdepth == "LowerKeysMesophotic"), aes(ymax=ymax, ymin = ymin, xmax = 4, xmin = 3, fill = cluster), color = "black", size = 0.25) +
annotate(geom = "text", x = 2, y = 0.75, label = "32.8", size = 2.5, fontface = "bold") +
scale_fill_manual(values = pieCol)+
coord_polar(theta="y") +
xlim(c(2, 4)) +
theme_void() +
theme(legend.position = "none", panel.background = element_blank())
tbMeso = ggplot() +
geom_rect(aes(ymax=1, ymin = 0, xmax = 4, xmin = 2), fill = flPal[3], alpha = 1) +
geom_rect(data = pieDf%>% filter(popdepth == "TortugasBankMesophotic"), aes(ymax=ymax, ymin = ymin, xmax = 4, xmin = 3, fill = cluster), color = "black", size = 0.25) +
annotate(geom = "text", x = 2, y = 0.75, label = "32.0", size = 2.5, fontface = "bold", color = "#ffffff") +
scale_fill_manual(values = pieCol)+
coord_polar(theta="y") +
xlim(c(2, 4)) +
theme_void() +
theme(legend.position = "none", panel.background = element_blank())
rhMeso = ggplot() +
geom_rect(aes(ymax=1, ymin = 0, xmax = 4, xmin = 2), fill = flPal[4], alpha = 1) +
geom_rect(data = pieDf %>% filter(popdepth == "Riley'sHumpMesophotic"), aes(ymax=ymax, ymin = ymin, xmax = 4, xmin = 3, fill = cluster), color = "black", size = 0.25) +
annotate(geom = "text", x = 2, y = 0.75, label = "33.2", size = 2.5, fontface = "bold", color = "#ffffff") +
scale_fill_manual(values = pieCol)+
coord_polar(theta="y") +
xlim(c(2, 4)) +
theme_void() +
theme(legend.position = "none", panel.background = element_blank())
ukShal = ggplot() +
geom_rect(aes(ymax=1, ymin = 0, xmax = 4, xmin = 2), fill = "#BCF5F7") +
geom_rect(data = pieDf%>% filter(popdepth == "UpperKeysShallow"), aes(ymax=ymax, ymin = ymin, xmax = 4, xmin = 3, fill = cluster), color = "black", size = 0.25) +
annotate(geom = "text", x = 2, y = 0.75, label = "23.6", size = 2.5, fontface = "bold") +
scale_fill_manual(values = pieCol)+
coord_polar(theta="y") +
xlim(c(2, 4)) +
theme_void() +
theme(legend.position = "none", panel.background = element_blank())
lkShal = ggplot() +
geom_rect(aes(ymax=1, ymin = 0, xmax = 4, xmin = 2), fill = "#76BDF3") +
geom_rect(data = pieDf%>% filter(popdepth == "LowerKeysShallow"), aes(ymax=ymax, ymin = ymin, xmax = 4, xmin = 3, fill = cluster), color = "black", , size = 0.25) +
annotate(geom = "text", x = 2, y = 0.75, label = "18.0", size = 2.5, fontface = "bold") +
scale_fill_manual(values = pieCol)+
coord_polar(theta="y") +
xlim(c(2, 4)) +
theme_void() +
theme(legend.position = "none", panel.background = element_blank())
tbShal = ggplot() +
geom_rect(aes(ymax=1, ymin = 0, xmax = 4, xmin = 2), fill = "#A969C9") +
geom_rect(data = pieDf%>% filter(popdepth == "TortugasBankShallow"), aes(ymax=ymax, ymin = ymin, xmax = 4, xmin = 3, fill = cluster), color = "black", size = 0.25) +
annotate(geom = "text", x = 2, y = 0.75, label = "21.1", size = 2.5, fontface = "bold", color = "#ffffff") +
scale_fill_manual(values = pieCol)+
coord_polar(theta="y") +
xlim(c(2, 4)) +
theme_void() +
theme(legend.position = "none", panel.background = element_blank())
rhShal = ggplot() +
geom_rect(aes(ymax=1, ymin = 0, xmax = 4, xmin = 2), fill = "#765DD8") +
geom_rect(data = pieDf %>% filter(popdepth == "Riley'sHumpShallow"), aes(ymax=ymax, ymin = ymin, xmax = 4, xmin = 3, fill = cluster), color = "black", size = 0.25) +
annotate(geom = "text", x = 2, y = 0.75, label = "26.4", size = 2.5, fontface = "bold", color = "#ffffff") +
scale_fill_manual(values = pieCol)+
coord_polar(theta = "y") +
xlim(c(2, 4)) +
theme_void() +
theme(legend.position = "none", panel.background = element_blank())
map = (floridaMap +
inset_element(largeMap, top = 1.01, right = 0.33, bottom = 0.63, left = -0.005, ignore_tag = TRUE) +
inset_element(dryTortugas, top = 0.36, right = 0.2875, bottom = -0.01, left = -0.0075, ignore_tag = TRUE) +
inset_element(lowerKeys, top = 0.36, right = 0.645, bottom = -0.01, left = 0.35, ignore_tag = TRUE) +
inset_element(upperKeys, top = 0.395, right = 1.00, bottom = 0.025, left = 0.705, ignore_tag = TRUE) +
inset_element(ukShal, top = 0.374, right = 0.99, bottom = 0.274, left = 0.89, ignore_tag = TRUE) +
inset_element(ukMeso, top = 0.284, right = 0.99, bottom = 0.184, left = 0.89, ignore_tag = TRUE) +
inset_element(lkShal, top = 0.209, right = 0.466, bottom = 0.109, left = 0.366, ignore_tag = TRUE) +
inset_element(lkMeso, top = 0.119, right = 0.466, bottom = 0.019, left = 0.366, ignore_tag = TRUE) +
inset_element(rhShal, top = 0.209, right = 0.11, bottom = 0.109, left = 0.01, ignore_tag = TRUE) +
inset_element(rhMeso, top = 0.119, right = 0.11, bottom = 0.019, left = 0.01, ignore_tag = TRUE) +
inset_element(tbShal, top = 0.338, right = 0.278, bottom = 0.238, left = 0.178, ignore_tag = TRUE) +
inset_element(tbMeso, top = 0.248, right = 0.278, bottom = 0.148, left = 0.178, ignore_tag = TRUE)
)
ggsave("../figures/figure1.png", plot = map, height = 7, width = 7, units = "in", dpi = 300)
ggsave("../figures/figure1.svg", plot = map, height = 7, width = 7, units = "in", dpi = 300)
Analyzing 2bRAD generated SNPs (24,670 loci) for population structure//genetic connectivity across sites and depth zones in FKNMS
rawSintReads = read.delim("../data/snps/sintRawReadCounts", header = FALSE)
colnames(rawSintReads) = c("sample", "reads")
head(rawSintReads)
## sample reads
## 1 FKSi1-1 42167284
## 2 FKSi1-2 54651139
## 3 FKSi1-3 41635251
## 4 FKSi1-4 37754282
## 5 FKSi1-5 39973126
## 6 FKSi1-6 45580831
#total reads
sum(rawSintReads$reads)
## [1] 796139328
#average reads/sample
(sum(rawSintReads$reads)/226)
## [1] 3522740
Identification of any natural clones using technical replicates as a baseline for clonality between samples.
cloneBams = read.csv("../data/stephanocoeniaMetaData.csv") # list of bam files
cloneMa = as.matrix(read.table("../data/snps/clones/sintClones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(cloneMa) = list(cloneBams[,1],cloneBams[,1])
clonePops = cloneBams$site
cloneDepth = cloneBams$depthZone
cloneDend = cloneMa %>% as.dist() %>% hclust(.,"ave") %>% as.dendrogram()
cloneDData = cloneDend %>% dendro_data()
# Making the branches hang shorter so we can easily see clonal groups
cloneDData$segments$yend2 = cloneDData$segments$yend
for(i in 1:nrow(cloneDData$segments)) {
if (cloneDData$segments$yend2[i] == 0) {
cloneDData$segments$yend2[i] = (cloneDData$segments$y[i] - 0.01)}}
cloneDendPoints = cloneDData$labels
cloneDendPoints$pop = clonePops[order.dendrogram(cloneDend)]
cloneDendPoints$depth=cloneDepth[order.dendrogram(cloneDend)]
rownames(cloneDendPoints) = cloneDendPoints$label
# Making points at the leaves to place symbols for populations
point = as.vector(NA)
for(i in 1:nrow(cloneDData$segments)) {
if (cloneDData$segments$yend[i] == 0) {
point[i] = cloneDData$segments$y[i] - 0.01
} else {
point[i] = NA}}
cloneDendPoints$y = point[!is.na(point)]
techReps = c("SFK066.1", "SFK066.2", "SFK066.3", "SFK162.1", "SFK162.2", "SFK162.3", "SFK205.1", "SFK205.2", "SFK205.3")
cloneDendPoints$depth = factor(cloneDendPoints$depth)
cloneDendPoints$depth = factor(cloneDendPoints$depth, levels(cloneDendPoints$depth)[c(2,1)])
cloneDendPoints$pop = factor(cloneDendPoints$pop)
cloneDendPoints$pop = factor(cloneDendPoints$pop,levels(cloneDendPoints$pop)[c(4, 1, 3, 2)])
cloneDendA = ggplot() +
geom_rect(aes(xmin = 44.25, xmax = 47.75, ymin = 0.03, ymax = 0.085), fill = teal, alpha = 0.4) +
geom_rect(aes(xmin = 154.25, xmax = 157.75, ymin = 0.065, ymax = 0.12), fill = teal, alpha = 0.4) +
geom_rect(aes(xmin = 172.25, xmax = 175.75, ymin = 0.065, ymax = 0.12), fill = teal, alpha = 0.4) +
geom_segment(data = segment(cloneDData), aes(x = x, y = y, xend = xend, yend = yend2), size = 0.5) +
geom_point(data = cloneDendPoints, aes(x = x, y = y, fill = pop, shape = depth), size = 4, stroke = 0.25) +
scale_fill_manual(values = flPal, name= "Site:") +
scale_shape_manual(values = c(24, 25), name = "Depth Zone:") +
geom_hline(yintercept = 0.118, color = teal, lty = 5, size = 1) + # creating a dashed line to indicate a clonal distance threshold
geom_text(data = subset(cloneDendPoints, subset = label %in% techReps), aes(x = x, y = (y - .02), label = label), angle = 90) + # spacing technical replicates further from leaf
geom_text(data = subset(cloneDendPoints, subset = !label %in% techReps), aes(x = x, y = (y - .015), label = label), angle = 90) +
labs(y = "Genetic distance (1 - IBS)") +
guides(fill = guide_legend(override.aes = list(shape = 22, size = 10), ncol = 2), shape = guide_legend(override.aes = list(size = 8), ncol = 1, order = 1)) +
coord_cartesian(xlim = c(5, 218), ylim = c(0.03, 0.25)) +
theme_classic()
cloneDend = cloneDendA + theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 24, color = "black", angle = 90),
axis.text.y = element_text(size = 20, color = "black"),
axis.line.y = element_line(),
axis.ticks.y = element_line(),
panel.grid = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
legend.key = element_blank(),
legend.title = element_text(size = 24),
legend.text = element_text(size = 20),
legend.position = "bottom")
cloneDend
ggsave("../figures/rmd/cloneDend.png", plot = cloneDend, height = 8, width = 35, units = "in", dpi = 300)
We removed the technical replicates/clones and re-ran ANGSD for all the following pop-gen analyses.
bams = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66,68,164,166,209,211),] # list of bams files and their populations (tech reps removed)
ma = as.matrix(read.table("../data/snps/sintNoClones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(ma) = list(bams[,1],bams[,1])
pops = bams$site
depth = bams$depthZone
dend = ma %>% as.dist() %>% hclust(.,"ave") %>% as.dendrogram()
dData = dend %>% dendro_data()
# Making the branches hang shorter so we can easily see clonal groups
dData$segments$yend2 = dData$segments$yend
for(i in 1:nrow(dData$segments)) {
if (dData$segments$yend2[i] == 0) {
dData$segments$yend2[i] = (dData$segments$y[i] - 0.01)}}
dendPoints = dData$labels
dendPoints$pop = pops[order.dendrogram(dend)]
dendPoints$depth = depth[order.dendrogram(dend)]
rownames(dendPoints) = dendPoints$label
# Making points at the leaves to place symbols for populations
point = as.vector(NA)
for(i in 1:nrow(dData$segments)) {
if (dData$segments$yend[i] == 0) {
point[i] = dData$segments$y[i] - 0.01
} else {
point[i] = NA}}
dendPoints$y = point[!is.na(point)]
dendPoints$depth = factor(dendPoints$depth)
dendPoints$depth = factor(dendPoints$depth, levels(dendPoints$depth)[c(2,1)])
dendPoints$pop = factor(dendPoints$pop)
dendPoints$pop = factor(dendPoints$pop, levels(dendPoints$pop)[c(4, 1, 3, 2)])
dendNoCloneA = ggplot() +
geom_segment(data = segment(dData), aes(x = x, y = y, xend = xend, yend = yend2), size = 0.5) +
geom_point(data = dendPoints, aes(x = x, y = y, fill = pop, shape = depth), size = 4, stroke = 0.25) +
scale_fill_manual(values = flPal, name= "Site:")+
scale_shape_manual(values = c(24, 25), name = "Depth Zone:")+
# spacing technical replicates further from leaf
labs(y = "Genetic distance (1 - IBS)") +
guides(fill = guide_legend(override.aes = list(shape = 22, size = 10), ncol = 2), shape = guide_legend(override.aes = list(size = 8), ncol = 1, order = 1)) +
coord_cartesian(xlim = c(5, 218)) +
theme_classic()
dendNoClone = dendNoCloneA + theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 24, color = "black", angle = 90),
axis.text.y = element_text(size = 20, color = "black"),
axis.line.y = element_line(),
axis.ticks.y = element_line(),
panel.grid = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
legend.key = element_blank(),
legend.title = element_text(size = 24),
legend.text = element_text(size = 20),
legend.position = "bottom")
# dend
ma = as.matrix(read.table("../data/snps/sintFiltSnps.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(ma) = list(pcadmix[,9],pcadmix[,9])
pops = pcadmix$pop
depth = pcadmix$depth
cluster = pcadmix$cluster
dend = ma %>% as.dist() %>% hclust(.,"ave") %>% as.dendrogram()
dData = dend %>% dendro_data()
# Making the branches hang shorter so we can easily see clonal groups
dData$segments$yend2 = dData$segments$yend
for(i in 1:nrow(dData$segments)) {
if (dData$segments$yend2[i] == 0) {
dData$segments$yend2[i] = (dData$segments$y[i] - 0.01)}}
dendPoints = dData$labels
dendPoints$pop = pops[order.dendrogram(dend)]
dendPoints$depth = depth[order.dendrogram(dend)]
dendPoints$cluster = cluster[order.dendrogram(dend)]
rownames(dendPoints) = dendPoints$label
# Making points at the leaves to place symbols for populations
point = as.vector(NA)
for(i in 1:nrow(dData$segments)) {
if (dData$segments$yend[i] == 0) {
point[i] = dData$segments$y[i] - 0.01
} else {
point[i] = NA}}
dendPoints$y = point[!is.na(point)]
dendPoints$depth = factor(dendPoints$depth)
dendPoints$depth = factor(dendPoints$depth, levels(dendPoints$depth)[c(2,1)])
dendPoints$pop = factor(dendPoints$pop)
dendPoints$pop = factor(dendPoints$pop, levels(dendPoints$pop)[c(4, 1, 3, 2)])
dendPoints$cluster = factor(dendPoints$cluster)
dendLA = ggplot() +
geom_segment(data = segment(dData), aes(x = x, y = y, xend = xend, yend = yend2), size = 0.5) +
geom_point(data = dendPoints, aes(x = x, y = y, fill = cluster, shape = depth), size = 4, stroke = 0.25) +
scale_fill_manual(values = kColPal, name= "Lineage:")+
scale_shape_manual(values = c(24, 25), name = "Depth Zone:")+
# spacing technical replicates further from leaf
labs(y = "Genetic distance (1 - IBS)") +
guides(fill = guide_legend(override.aes = list(shape = 22, size = 10), ncol = 3), shape = guide_legend(override.aes = list(size = 8), ncol = 1, order = 1)) +
coord_cartesian(xlim = c(5, 218)) +
theme_classic()
dendL = dendLA + theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 24, color = "black", angle = 90),
axis.text.y = element_text(size = 20, color = "black"),
axis.line.y = element_line(),
axis.ticks.y = element_line(),
panel.grid = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
legend.key = element_blank(),
legend.title = element_text(size = 24),
legend.text = element_text(size = 20),
legend.position = "bottom")
# dendL
dendPlots = (cloneDend / dendNoClone / dendL) +
plot_annotation(tag_levels = 'A') +
plot_layout(guides = "collect") &
theme(plot.tag = element_text(size = 32),
legend.position = "bottom")
ggsave("../figures/figureS1.png", plot = dendPlots, height = 19.5, width = 35, units = "in", dpi = 300)
ggsave("../figures/figureS1.svg", plot = dendPlots, height = 19.5, width = 35, units = "in", dpi = 300)
cov = read.table("../data/snps/fkSintPcangsd.cov") %>% as.matrix()
pcadmix = read.table("../data/snps/k/ClumppK4.output") %>% dplyr::select(V6, V7, V8, V9)
pcadmix %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9))
## sum(V6) sum(V7) sum(V8) sum(V9)
## 1 128.9256 43.261 31.258 16.5555
pcadmix = pcadmix %>% rename("cluster1" = "V6", "cluster2" = "V7", "cluster3" = "V8", "cluster4" = "V9") %>%dplyr::select(order(colnames(.)))
pcaEig = eigen(cov)
sintPcaVar = pcaEig$values/sum(pcaEig$values)*100
head(sintPcaVar)
## [1] 8.8656594 3.9912640 3.4620011 0.6615153 0.4993853 0.4912081
pcangsd = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66, 68, 164, 166, 209, 211),] %>% dplyr::select("sample" = tubeID, "pop" = site, "depth" = depthZone, "depthm" = depthM)
pcangsd$popdepth = as.factor(paste(pcangsd$pop, pcangsd$depth, sep = " "))
pcangsd$popdepth = factor(pcangsd$popdepth, levels(pcangsd$popdepth)[c(4, 3, 6, 5, 2, 1, 8, 7)])
pcangsd$pop = factor(pcangsd$pop)
pcangsd$pop = factor(pcangsd$pop, levels(pcangsd$pop)[c( 4, 1, 3, 2)])
pcangsd$depth = factor(pcangsd$depth)
pcangsd$depth = factor(pcangsd$depth, levels(pcangsd$depth)[c(2, 1)])
pcangsd$PC1 = pcaEig$vectors[,1]
pcangsd$PC2 = pcaEig$vectors[,2]
pcangsd$PC3 = pcaEig$vectors[,3]
pcangsd$PC4 = pcaEig$vectors[,4]
pcangsdClust = pcadmix %>% mutate(cluster = ifelse(cluster1 < 0.75 & cluster2 < 0.75 & cluster3 < 0.75 & cluster4 < 0.75, "NA", ifelse(cluster1 >=0.75, 1, ifelse(cluster2 >= 0.75, 2, ifelse(cluster3 >= 0.75, 3,ifelse(cluster4 >= 0.75, 4, 0))))))
pcangsd = pcangsd %>% mutate(pcangsdClust)
pcangsd$cluster = as.factor(pcangsd$cluster)
levels(pcangsd$cluster) = c("Blue", "Teal", "Green", "Yellow", "Admixed")
pcangsd = merge(pcangsd, aggregate(cbind(mean.x = PC1, mean.y = PC2)~popdepth, pcangsd, mean), by="popdepth")
pcangsd %>% group_by(depth,cluster) %>% summarize(n = n())
## `summarise()` has grouped output by 'depth'. You can override using the `.groups`
## argument.
## # A tibble: 9 × 3
## # Groups: depth [2]
## depth cluster n
## <fct> <fct> <int>
## 1 Shallow Blue 50
## 2 Shallow Teal 25
## 3 Shallow Green 29
## 4 Shallow Yellow 15
## 5 Shallow Admixed 1
## 6 Mesophotic Blue 81
## 7 Mesophotic Teal 15
## 8 Mesophotic Green 2
## 9 Mesophotic Admixed 2
Plot PCA
pcaTheme = theme(axis.title.x = element_text(color = "black", size = 10),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line.x = element_blank(),
axis.title.y = element_text(color = "black", size = 10),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = "none",
legend.title = element_text(size = 8),
legend.text = element_text(size = 8),
legend.key.size = unit(5, "pt"),
panel.border = element_rect(color = "black", size = 1),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
pcaPlot12SA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.25) +
geom_vline(xintercept = 0, color = "gray90", size = 0.25) +
geom_point(data = pcangsd, aes(x = PC1, y = PC2, fill = pop, shape = depth, color = pop), stroke = 0, size = 2.5, alpha = 0.5, show.legend = FALSE) +
geom_point(data = pcangsd, aes(x = mean.x, y = mean.y, fill = pop, shape = depth), color = "black", size = 2.75, alpha = 1, stroke = 0.25) +
scale_shape_manual(values = c(24, 25), name = "Depth Zone") +
scale_fill_manual(values = flPal, name = "Site") +
scale_color_manual(values = flPal, name = "Site") +
labs(x = paste0("PC 1 (", format(round(sintPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(sintPcaVar[2], 1), nsmall = 1), " %)")) +
guides(shape = guide_legend(override.aes = list(size = 2, stroke = 0.25, alpha = NA), order = 2, ncol = 1), fill = guide_legend(override.aes = list(shape = 22, size = 2, fill = flPal, alpha = NA), order = 1, ncol = 1)) +
theme_bw()
pcaPlot12S = pcaPlot12SA +
pcaTheme +
theme(legend.position = c(0.17, 0.23))
pcaPlot12S
pcaPlot12LA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
geom_point(data = pcangsd, aes(x = PC1, y = PC2, fill = cluster, shape = depth), color = "black", size = 2, alpha = 1, show.legend = TRUE) +
scale_shape_manual(values = c(24,25), name = "Depth Zone") +
scale_fill_manual(values = kColPal, name = "Lineage") +
labs(x = paste0("PC 1 (", format(round(sintPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(sintPcaVar[2], 1), nsmall = 1), " %)")) +
guides(shape = "none", fill = guide_legend(override.aes = list(shape = 22, size = 2, fill = kColPal, alpha = NA), order = 1, ncol = 1))+
theme_bw()
pcaPlot12L = pcaPlot12LA +
pcaTheme +
theme(legend.position = c(0.12,0.15))
pcaPlot23LA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
geom_point(data = pcangsd, aes(x = PC3, y = PC2, fill = cluster, shape = depth), color = "black", size = 2, alpha = 1, show.legend = TRUE) +
scale_shape_manual(values = c(24,25), name = "Depth Zone") +
scale_fill_manual(values = kColPal, name = "Lineage") +
labs(x = paste0("PC 3 (", format(round(sintPcaVar[3], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(sintPcaVar[2], 1), nsmall = 1), " %)")) +
guides(shape = guide_legend(override.aes = list(size = 2, stroke = 0.5, alpha = NA), order = 2, ncol = 1), fill = guide_legend(override.aes = list(shape = 22, size = 2, fill = kColPal, alpha = NA), order = 1, ncol = 1, byrow = TRUE))+
theme_bw()
pcaPlot23L = pcaPlot23LA +
pcaTheme
Put all plots together
pcaPlots = ((pcaPlot12S + theme(axis.title.y = element_text(margin = ggplot2::margin(r = -20, unit = "pt")))) | pcaPlot12L | pcaPlot23L) +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 18),
panel.background = element_rect(fill = "white"),
legend.spacing = unit(-5, "pt"),
legend.key = element_blank(),
legend.background = element_blank())
pcaPlots
Prepare admixture outputs for plotting
fkSintAdmix = pcangsd %>%dplyr::select(-PC1, -PC2, -PC3, -PC4, -cluster, -depthm, -mean.x, -mean.y)
fkSintAdmix$pop = factor(fkSintAdmix$pop, levels(fkSintAdmix$pop)[c( 4, 3, 2, 1)])
fkSintAdmix = arrange(fkSintAdmix, pop, depth, -cluster1, -cluster2, cluster3)
popCounts = fkSintAdmix %>% group_by(pop, depth) %>% summarise(n = n())
## `summarise()` has grouped output by 'pop'. You can override using the `.groups`
## argument.
popCounts
## # A tibble: 8 × 3
## # Groups: pop [4]
## pop depth n
## <fct> <fct> <int>
## 1 Riley's Hump Shallow 30
## 2 Riley's Hump Mesophotic 15
## 3 Tortugas Bank Shallow 30
## 4 Tortugas Bank Mesophotic 25
## 5 Lower Keys Shallow 30
## 6 Lower Keys Mesophotic 30
## 7 Upper Keys Shallow 30
## 8 Upper Keys Mesophotic 30
popCountList = reshape2::melt(lapply(popCounts$n,function(x){c(1:x)}))
fkSintAdmix$ord = popCountList$value
fkSintAdmixMelt = melt(fkSintAdmix, id.vars=c("sample", "pop", "depth", "popdepth", "ord"), variable.name="Ancestry", value.name="Fraction")
fkSintAdmixMelt$Ancestry = factor(fkSintAdmixMelt$Ancestry)
fkSintAdmixMelt$Ancestry = factor(fkSintAdmixMelt$Ancestry, levels = rev(levels(fkSintAdmixMelt$Ancestry)))
popAnno = data.frame(x1 = c(0.5, 0.5, 0.5, 0.5), x2 = c(30.5, 30.5, 30.5, 30.5),
y1 = -0.1, y2 = -0.1, sample = NA, Ancestry = NA, depth = "Mesophotic",
ord = NA, Fraction = NA,
pop = c("Riley's Hump", "Tortugas Bank",
"Lower Keys", "Upper Keys"))
popAnno$pop = factor(popAnno$pop)
popAnno$pop = factor(popAnno$pop, levels = levels(popAnno$pop)[c(4, 1, 3, 2)])
Make admixture plots
admixPlotA = ggplot(data = fkSintAdmixMelt, aes(x = ord, y = Fraction, fill = Ancestry, order = sample)) +
geom_segment(data = popAnno, aes(x = x1, xend = x2, y = -.12, yend = -.12, color = pop), size = 7) +
geom_bar(stat = "identity", position = "fill", width = 1, colour = "grey25", size = 0.2) +
facet_grid(factor(depth) ~ pop, switch = "both") +
geom_text(data = (fkSintAdmixMelt %>% filter(depth == "Mesophotic", pop %in% c("Riley's Hump", "Tortugas Bank"), sample %in% c(
"SFK001", "SFK100"), Ancestry == "cluster1")), x = 15.5, y = -.1, aes(label = pop), size = 3.5, color = "#FFFFFF") +
geom_text(data = (fkSintAdmixMelt %>% filter(depth == "Mesophotic", pop %in% c("Lower Keys", "Upper Keys"), sample %in% c(
"SFK101", "SFK201"), Ancestry == "cluster1")), x = 15.5, y = -.1, aes(label = pop), size = 3.5, color = "#000000") +
scale_fill_manual(values = kColPal) +
scale_color_manual(values = flPal) +
scale_x_discrete(expand = c(0.005, 0.005)) +
scale_y_continuous(expand = c(0.001, 0.001)) +
coord_cartesian(ylim = c(0.0, 1.0), clip = "off") +
theme_bw()
admixPlot = admixPlotA +
theme_bw()+
theme(plot.title = element_text(hjust = 0.5, size = 10),
panel.grid = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
panel.border = element_rect(fill = NA, color = "black", size = 0.5, linetype = "solid"),
panel.spacing.x = grid:::unit(0.0125, "lines"),
panel.spacing.y = grid:::unit(0.0125, "lines"),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
axis.title = element_blank(),
strip.background.x = element_blank(),
strip.background.y = element_blank(),
strip.text = element_text(size = 8),
strip.text.y.left = element_text(size = 10, angle = 90),
strip.text.x.bottom = element_text(vjust = 1, color = NA),
legend.key = element_blank(),
legend.position = "none",
legend.title = element_text(size = 8))
# admixPlot
Calculating differentiation with Pairwise Fst
gunzip("../data/snps/sintNoClones.bcf.gz")
sintVcf = read.vcfR("../data/snps/sintNoClones.bcf", verbose = FALSE)
gzip("../data/snps/sintNoClones.bcf")
sintGenlightPopDepth = vcfR2genlight(sintVcf, n.cores = 8) # Converts the vcf file into a file format that poppr uses the "genlight" format
locNames(sintGenlightPopDepth) = paste(sintVcf@fix[,1],sintVcf@fix[,2],sep="_")
popData = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66, 68, 164, 166, 209, 211),] %>% dplyr::select("sample" = tubeID, "pop" = site, "depth" = depthZone) # Reads in population data for each sample
popData$popdepth = paste(popData$pop, popData$depth, sep = " ")
strata(sintGenlightPopDepth) = data.frame(popData)
setPop(sintGenlightPopDepth) = ~popdepth
sintGenlightPopDepth$pop = factor(sintGenlightPopDepth$pop)
sintGenlightPopDepth$pop = factor(sintGenlightPopDepth$pop,
levels(sintGenlightPopDepth$pop)[c(7, 8, 6, 5, 2, 1, 4, 3)])
levels(sintGenlightPopDepth$pop) = c("Upper Keys\nShallow", "Upper Keys\nMesophotic", "Lower Keys\nShallow", "Lower Keys\nMesophotic", "Tortugas Bank\nShallow", "Tortugas Bank\nMesophotic", "Riley's Hump\nShallow", "Riley's Hump\nMesophotic")
set.seed(694)
fk.fst <- stamppFst(sintGenlightPopDepth, nboots = 1, percent = 95, nclusters = 8)
fk.fst
## Tortugas Bank\nMesophotic Tortugas Bank\nShallow
## Tortugas Bank\nMesophotic NA NA
## Tortugas Bank\nShallow 0.021928230 NA
## Riley's Hump\nMesophotic 0.002397382 0.012550165
## Riley's Hump\nShallow 0.010785004 0.006637748
## Lower Keys\nMesophotic 0.002930507 0.029957697
## Lower Keys\nShallow 0.021249108 0.005407182
## Upper Keys\nShallow 0.019104757 0.014108148
## Upper Keys\nMesophotic 0.001165877 0.027396216
## Riley's Hump\nMesophotic Riley's Hump\nShallow
## Tortugas Bank\nMesophotic NA NA
## Tortugas Bank\nShallow NA NA
## Riley's Hump\nMesophotic NA NA
## Riley's Hump\nShallow 0.0003676029 NA
## Lower Keys\nMesophotic 0.0078265244 0.013753336
## Lower Keys\nShallow 0.0061114733 -0.002212034
## Upper Keys\nShallow 0.0049598707 -0.001557742
## Upper Keys\nMesophotic 0.0052272451 0.012581289
## Lower Keys\nMesophotic Lower Keys\nShallow
## Tortugas Bank\nMesophotic NA NA
## Tortugas Bank\nShallow NA NA
## Riley's Hump\nMesophotic NA NA
## Riley's Hump\nShallow NA NA
## Lower Keys\nMesophotic NA NA
## Lower Keys\nShallow 0.0255855830 NA
## Upper Keys\nShallow 0.0211184139 -0.001759316
## Upper Keys\nMesophotic -0.0002008376 0.024025484
## Upper Keys\nShallow Upper Keys\nMesophotic
## Tortugas Bank\nMesophotic NA NA
## Tortugas Bank\nShallow NA NA
## Riley's Hump\nMesophotic NA NA
## Riley's Hump\nShallow NA NA
## Lower Keys\nMesophotic NA NA
## Lower Keys\nShallow NA NA
## Upper Keys\nShallow NA NA
## Upper Keys\nMesophotic 0.019923 NA
Now organize the data to plot
pop.order = c("Riley's Hump\nMesophotic", "Riley's Hump\nShallow", "Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Lower Keys\nMesophotic", "Lower Keys\nShallow", "Upper Keys\nMesophotic", "Upper Keys\nShallow")
# reads in fst matrix
snpFstMa <- as.matrix(fk.fst)
upperTriangle(snpFstMa, byrow = TRUE) <- lowerTriangle(snpFstMa)
snpFstMa <- snpFstMa[,pop.order] %>%
.[pop.order,]
snpFstMa[upper.tri(snpFstMa)] <- NA
snpFstMa <- as.data.frame(snpFstMa)
# rearrange and reformat matrix
snpFstMa$Pop = factor(row.names(snpFstMa), levels = unique(pop.order))
# melt matrix data (turn from matrix into long dataframe)
snpFstMa$Pop = factor(row.names(snpFstMa), levels = unique(pop.order))
snpFst = melt(snpFstMa, id.vars = "Pop", value.name = "Fst", variable.name = "Pop2", na.rm = FALSE)
snpFst$Fst = round(snpFst$Fst, 3)
snpFst = snpFst %>% mutate(Fst = replace(Fst, Fst < 0, 0))
snpFst$site = snpFst$Pop
snpFst$site = factor(gsub("\\n.*", "", snpFst$site))
snpFst$site = factor(snpFst$site, levels = levels(snpFst$site)[c(4, 1, 3, 2)])
snpFst$site2 = snpFst$Pop2
snpFst$site2 = factor(gsub("\\n.*", "", snpFst$site2))
snpFst$site2 = factor(snpFst$site2, levels = levels(snpFst$site2)[c(4, 1, 3, 2)])
snpFst$Fst = sprintf('%.3f', snpFst$Fst)
snpFst$Fst = factor(gsub("\\NA", NA, snpFst$Fst))
snpFst$Fst = factor(gsub("\\.000", "", snpFst$Fst))
snpFst$Fst = factor(gsub("\\-", "", snpFst$Fst))
levels(snpFst$Pop) = c("RH\nMeso", "RH\nShallow", "TB\nMeso", "TB\nShallow", "LK\nMeso", "LK\nShallow", "UK\nMeso", "UK \nShallow")
levels(snpFst$Pop2) = c("RH\nMeso", "RH\nShallow", "TB\nMeso", "TB\nShallow", "LK\nMeso", "LK\nShallow", "UK\nMeso", "UK \nShallow")
Plotting fst data as a heatmap
snpHeatmapA = ggplot(data = snpFst, aes(Pop, Pop2, fill = as.numeric(as.character(Fst)))) +
geom_tile(color = "white") +
geom_segment(data = snpFst, aes(x = 0.475, xend = -0.5, y = Pop, yend = Pop, color = site), size = 9.75) +
geom_segment(data = snpFst, aes(x = Pop2, xend = Pop2, y = 0.45, yend = -0.55, color = site2), size = 16.75) +
scale_color_manual(values = flPal, guide = NULL) +
scale_fill_gradient(low = "gray95", high = "gray35", limit = c(0, 0.03), space = "Lab", name = expression(paste(italic("F")[ST])), na.value = "white", guide = "colourbar") +
geom_text(data = snpFst, aes(Pop, Pop2, label = Fst), color = "black", size = 3.25) +
guides(fill = guide_colorbar(barwidth = 6, barheight = 0.5, title.position = "top", title.hjust = 0.5)) +
geom_text(data = (snpFst %>% filter(site %in% c("Riley's Hump", "Tortugas Bank"), Pop2 == "RH\nMeso")), y = -.05, aes(x = Pop, label = Pop), size = 3, color = "#FFFFFF", family = "sans") +
geom_text(data = (snpFst %>% filter(site %in% c("Lower Keys", "Upper Keys"), Pop2 == "LK\nMeso")), y = -.05, aes(x = Pop, label = Pop), size = 3, color = "#000000", family = "sans") +
geom_text(data = (snpFst %>% filter(site %in% c("Riley's Hump", "Tortugas Bank"), Pop2 == "RH\nMeso")), x = -.025, aes(y = Pop, label = Pop), size = 3, color = "#FFFFFF", family = "sans") +
geom_text(data = (snpFst %>% filter(site %in% c("Lower Keys", "Upper Keys"), Pop2 == "LK\nMeso")), x = -.025, aes(y = Pop, label = Pop), size = 3, color = "#000000", family = "sans") +
scale_y_discrete(position = "left") +
scale_x_discrete(limits = rev(levels(snpFst$Pop))[c(1:8)]) +
coord_cartesian(xlim = c(1, 8), ylim = c(1, 8), clip = "off") +
theme_minimal()
snpHeatmap = snpHeatmapA + theme(
axis.text.x = element_text(vjust = 2, size = 10, hjust = 0.5, color = NA),
axis.text.y = element_text(size = 10, color = NA, margin = margin(r = -5)),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
axis.ticks = element_blank(),
legend.position = c(0.5, 0.9),
legend.direction = "horizontal",
legend.title = element_text(size = 10),
legend.text = element_text(size = 8)
)
# snpHeatmap
structurePlots = ((pcaPlots / (admixPlot | snpHeatmap))) +
plot_layout(widths = c( 1, 1.5)) +
plot_annotation(tag_levels = "A") &
theme(plot.tag = element_text(size = 16, face = "plain"))
ggsave("../figures/figure2.png", plot = structurePlots, height = 6.5, width = 10, units = "in", dpi = 300)
ggsave("../figures/figure2.svg", plot = structurePlots, height = 6.5, width = 10, units = "in", dpi = 300)
leveneTest(lm(depthm ~ cluster, data = subset(pcangsd, subset = pcangsd$cluster!="Admixed")))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 6.9537 0.000174 ***
## 213
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
depthAov = welch_anova_test(depthm ~ cluster, data = subset(pcangsd, subset = pcangsd$cluster!="Admixed"))
dF = depthAov$statistic[[1]]
depthPH = games_howell_test(depthm ~ cluster, data = subset(pcangsd, subset = pcangsd$cluster!="Admixed"), conf.level = 0.95) %>% as.data.frame()
depthLetters = data.frame(x = factor(c("Blue", "Teal", "Green", "Yellow")), y = c(2.5, 2.5, 2.5, 2.5), grp = c("a", "b", "bc", "c"))
lineageViolinA = ggplot(data = subset(pcangsd, subset = pcangsd$cluster!="Admixed"), aes(x = cluster, y = depthm)) +
annotate(geom = "rect", xmin = -Inf, xmax = Inf, ymin = 30, ymax = Inf, fill = "black", alpha = 0.15, color = NA) +
geom_beeswarm(aes(fill = cluster), shape = 21, size = 2, cex = 1.5, alpha = 0.5) +
geom_violin(aes(fill = cluster),adjust = 1, linewidth = 0, color = "black", alpha = 0.35, width = 0.9, trim = F, scale = "width") +
geom_violin(adjust = 1, linewidth = 0.4, color = "black", alpha = 1, width = 0.9, trim = F, fill = NA, scale = "width") +
geom_boxplot(width = 0.2, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.6, alpha = 0.5) +
geom_text(data = depthLetters, aes(x = x, y = y, label = grp), size = 4) +
annotate(geom = "text", x = 3.75, y =50, label = bquote(italic("F")~" = "~.(dF)*"; "~italic("p")~" < 0.001"), size = 3) +
scale_fill_discrete(type = kColPal, name = "Lineage") +
scale_color_discrete(type = kColPal, name = "Lineage") +
xlab("Lineage") +
ylab("Depth (m)") +
scale_y_reverse(breaks = seq(5, 50, 5)) +
theme_bw()
lineageViolin = lineageViolinA + theme(
axis.title = element_text(color = "black", size = 12),
axis.text = element_text(color = "black", size = 10),
legend.position = "none",
legend.key.size = unit(0.3, 'cm'),
panel.border = element_rect(color = "black", size = 1),
panel.background = element_blank(),
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
lineageViolin
Measuring with global weighted FST calculated from SFS
First prepare and sort FST for plotting
### FST ###
pop.order = c("Blue", "Teal", "Green", "Yellow")
# reads in fst matrix
fstMa1 <- read.csv("../data/snps/kFst.csv")
row.names(fstMa1) <- fstMa1$pop1
fstMa = fstMa1 %>%dplyr::select(-pop1) %>% as.matrix()
upperTriangle(fstMa, byrow = TRUE) <- lowerTriangle(fstMa)
fstMa <- fstMa[,pop.order] %>%
.[pop.order,]
fstMa[upper.tri(fstMa)] <- NA
fstMa <- as.data.frame(fstMa)
# rearrange and reformat matrix
fstMa$Pop = factor(row.names(fstMa), levels = unique(pop.order))
# melt matrix data (turn from matrix into long dataframe)
fst = melt(fstMa, id.vars = "Pop", value.name = "Fst", variable.name = "Pop2", na.rm = FALSE)
fst$Fst = round(fst$Fst, 3)
fst = fst %>% mutate(Fst = replace(Fst, Fst < 0, 0))
fst$site = fst$Pop
fst$site = factor(gsub("\\n.*", "", fst$site))
fst$site = factor(fst$site, levels = levels(fst$site)[c(1, 3, 2, 4)])
fst$site2 = fst$Pop2
fst$site2 = factor(gsub("\\n.*", "", fst$site2))
fst$site2 = factor(fst$site2, levels = levels(fst$site2)[c(1, 3, 2, 4)])
fst$Pop2 = factor(fst$Pop2, levels = levels(fst$Pop2)[c(4, 3, 2, 1)])
fst$Fst = sprintf('%.3f', fst$Fst)
fst$Fst = factor(gsub("\\NA", NA, fst$Fst))
fst$Fst = factor(gsub("\\.000", "", fst$Fst))
fst$Fst = factor(gsub("\\-", "", fst$Fst))
Construct a heatmap of FST values
fstHeatmapA = ggplot(data = fst, aes(Pop, Pop2, fill = as.numeric(as.character(Fst)))) +
geom_tile(color = "white") +
geom_segment(data = fst, aes(x = 0.475, xend = 0.15, y = Pop2, yend = Pop2, color = site2), size = 21.25) + #colored boxes for Y-axis labels
geom_segment(data = fst, aes(x = Pop, xend = Pop, y = 0.2, yend = 0.475, color = site), size = 25) + #colored boxes for X-axis labels
scale_color_manual(values = kColPal, guide = NULL) +
scale_fill_gradient(low = "gray95", high = "gray35", limit = c(0, 0.23), space = "Lab", name = expression(paste(italic("F")[ST])), na.value = NA, guide = "colourbar") +
geom_text(data = fst, aes(Pop, Pop2, label = Fst), color = "black", size = 3.5) +
guides(fill = guide_colorbar(barwidth = 7.5, barheight = 0.75, title.position = "top", title.hjust = 0.5)) +
scale_y_discrete(position = "left", limits = rev(levels(fst$Pop2))) +
scale_x_discrete(limits = levels(fst$Pop2)[c(1:4)]) +
coord_cartesian(xlim = c(1, 4), ylim = c(1, 4), clip = "off") +
theme_minimal()
fstHeatmap = fstHeatmapA + theme(
axis.text.x = element_text(vjust = 3.5, size = 10, hjust = 0.5, color = "black"),
axis.text.y = element_text(size = 10, color = "black", angle = 90, hjust = 0.5, vjust = -1.5),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
axis.ticks = element_blank(),
legend.title = element_text(size = 8, color = "black"),
legend.text = element_text(size = 8, color = "black"),
legend.position = c(0.6, 0.9),
legend.direction = "horizontal",
plot.background = element_blank(),
panel.background = element_blank(),
)
fstHeatmap
Making stairway plot of effective population sizes of each lineage throughout time
bl = read.table("../data/snps/sintBlue.final.summary", header = TRUE) %>% mutate("Lineage" = "Blue")
tl = read.table("../data/snps/sintTeal.final.summary", header = TRUE) %>% mutate("Lineage" = "Teal")
gn = read.table("../data/snps/sintGreen.final.summary", header = TRUE) %>% mutate("Lineage" = "Green")
yl = read.table("../data/snps/sintYellow.final.summary", header = TRUE) %>% mutate("Lineage" = "Yellow")
swData = rbind(bl, tl, gn, yl)
swData$Lineage = factor(swData$Lineage)
swData$Lineage = factor(swData$Lineage, levels = levels(swData$Lineage)[c(1,3,2,4)])
Constuct plot
swPlotA = ggplot(data = swData, aes(x = year, y = Ne_median, ymin = Ne_12.5., ymax = Ne_87.5., color = Lineage, fill = Lineage)) +
geom_ribbon(color = NA, aes(alpha = Lineage)) +
# geom_line(size = 0.6) +
geom_line(linewidth = 1.15) +
scale_fill_manual(values = kColPal[c(1:4)]) +
scale_color_manual(values = kColPal[c(1:4)]) +
scale_alpha_manual(values = c(0.25, 0.25, 0.35, 0.4)) +
scale_x_continuous(name = "KYA", limits = c(0,5.25e5), breaks = c(1e5,2e5,3e5,4e5,5e5), labels = c("100","200", "300", "400", "500")) +
scale_y_continuous(name = bquote(italic(N[e])~"(x10"^"3"*")"), limits = c(0,14e5), breaks = c(2.5e5,5e5,7.5e5,10e5,12.5e5), labels = c("250","500", "750", "1000", "1250"))+
coord_cartesian(xlim = c(5.25e5, 0), expand = FALSE) +
theme_bw()
swPlot = swPlotA + theme(
axis.title = element_text(color = "black", size = 12),
axis.text = element_text(color = "black", size = 10),
legend.key.size = unit(0.3, 'cm'),
legend.title = element_text(color = "black", size = 12),
legend.text = element_text(color = "black", size = 12),
legend.position = "none",
# legend.position = c(0.85, 0.82),
plot.background = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(size = 1),
panel.grid = element_blank()
)
swPlot
popData = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66, 68, 164, 166, 209, 211),] %>% dplyr::select("sample" = tubeID, "Site" = site, "Depth" = depthZone, "depthm" = depthM) # Reads in population data
popData$a = c(0:219)
popData$Site = factor(popData$Site)
popData$Site = factor(popData$Site, levels = levels(popData$Site)[c(2,3,1,4)])
popData$Depth = factor(popData$Depth)
popData$Depth = factor(popData$Depth, levels = levels(popData$Depth)[c(2,1)])
sampleData = fknmsSites[-c(66,68,164,166,209,211),] %>% group_by(site, depthZone)%>% summarise(depthZone = (first(depthZone)), depthRange = paste(min(depthM), "--", max(depthM), sep = ""), meanDepth = round(mean(depthM),1), n = n())%>% droplevels() %>% as.data.frame()
## `summarise()` has grouped output by 'site'. You can override using the `.groups`
## argument.
# Average depth of populations
fkPopDepths = fknmsSites[-c(66,68,164,166,209,211),] %>% group_by(site, depthZone) %>% summarise(avgDepthM = mean(depthM), n = n())
## `summarise()` has grouped output by 'site'. You can override using the `.groups`
## argument.
fkPopDepths
## # A tibble: 8 × 4
## # Groups: site [4]
## site depthZone avgDepthM n
## <fct> <fct> <dbl> <int>
## 1 Upper Keys Shallow 23.6 30
## 2 Upper Keys Mesophotic 43.8 30
## 3 Lower Keys Shallow 18.0 30
## 4 Lower Keys Mesophotic 32.8 30
## 5 Tortugas Bank Shallow 21.1 30
## 6 Tortugas Bank Mesophotic 32.0 25
## 7 Riley's Hump Shallow 26.4 30
## 8 Riley's Hump Mesophotic 33.2 15
sampleTab = sampleData
colnames(sampleTab) = c("Site", "Depth zone", "Sampling \ndepth (m)", "Average \ndepth (m)", "n")
sampleTab$Site
## [1] Upper Keys Upper Keys Lower Keys Lower Keys Tortugas Bank Tortugas Bank
## [7] Riley's Hump Riley's Hump
## Levels: Upper Keys Lower Keys Tortugas Bank Riley's Hump
finalTabSite = c("Upper Keys", "", "Lower Keys","", "Tortugas Bank", "", "Riley's Hump", "")
sampleTab$Site = finalTabSite
hetAll = read.table("../data/snps/sintHet")
colnames(hetAll) = c("sample", "He")
hetAll$sample = str_pad(hetAll$sample, 3, pad = "0")
hetAll$sample = paste("SFK",hetAll$sample, sep ="")
sintBreed = read.delim("../data/snps/fkSint.indF", header = FALSE)
sintRelate = read.delim("../data/snps/filtRelate")
sintRelate2 = sintRelate %>% group_by(a, b) %>% dplyr::select("Rab" = rab, "theta" = theta)
## Adding missing grouping variables: `a`, `b`
sintRelate2 = sintRelate2 %>% left_join(popData, by = "a") %>% left_join(popData, by = c("b" = "a"), suffix = c(".a", ".b"))
sintRelate2$popDepth.a = paste(sintRelate2$Site.a, sintRelate2$Depth.a, sep = " ")
sintRelate2$popDepth.b = paste(sintRelate2$Site.b, sintRelate2$Depth.b, sep = " ")
sintRelate2 = sintRelate2 %>% left_join((pcangsd %>%dplyr::select(sample, cluster)) , by = c("sample.a" = "sample")) %>% left_join((pcangsd %>%dplyr::select(sample, cluster)) , by = c("sample.b" = "sample"))
sintRelate = sintRelate2 %>% filter(cluster.x != "Admixed",cluster.x == cluster.y) %>% rename(Depth = Depth.a, Site = Site.a, cluster = cluster.x)
sintRelateMean = sintRelate %>% group_by(Site, Depth) %>% dplyr::summarize(N = n(), meanRab = mean(Rab), seRab = sd(Rab)/sqrt(N), meanTheta = mean(theta), seTheta = sd(theta)/sqrt(N)) %>% dplyr::select(-N)
## `summarise()` has grouped output by 'Site'. You can override using the `.groups`
## argument.
het = left_join(popData, hetAll, by = "sample") %>% mutate("inbreed" = sintBreed$V1) %>% left_join((pcangsd %>% dplyr::select(sample, cluster)) , by = "sample") %>% dplyr::select(-a)
hetStats = het %>% group_by(Site, Depth) %>% summarise(N = n(), meanAll = mean(He), sdAll = sd(He), seAll = sd(He)/sqrt(N), meanInbreed = mean(inbreed), sdInbreed = sd(inbreed), seInbreed = sd(inbreed)/sqrt(N)) %>% left_join(sintRelateMean)
## `summarise()` has grouped output by 'Site'. You can override using the `.groups`
## argument.
## Joining with `by = join_by(Site, Depth)`
min(hetStats$meanAll, na.rm = TRUE)
## [1] 0.00251
max(hetStats$meanAll, na.rm = TRUE)
## [1] 0.002776667
hetTab = hetStats %>% arrange(desc(Site))
hetTab$n = hetTab$N
hetTab$Ha = paste(round(hetTab$meanAll, 4), "±", round(hetTab$seAll, 5), sep = " ")
hetTab$F = paste(round(hetTab$meanInbreed, 2), "±", round(hetTab$seInbreed, 3), sep = " ")
hetTab$Rab = paste(round(hetTab$meanRab, 2), "±", round(hetTab$seRab, 3), sep = " ")
hetTab$Theta = paste(round(hetTab$meanTheta, 2), "±", round(hetTab$seTheta, 4), sep = " ")
hetTab$`Sampling \ndepth (m)` = sampleTab$`Sampling \ndepth (m)`
hetTab$`Average \ndepth (m)` = sampleTab$`Average \ndepth (m)`
hetTab = hetTab %>% dplyr::select(Site, Depth, `Sampling \ndepth (m)`, `Average \ndepth (m)`, n, Ha, F, Rab, Theta)
colnames(hetTab)[2] = "Depth \nzone"
finalTabSite = c("Upper Keys", "", "Lower Keys", "", "Tortugas Bank", "", "Riley's Hump", "")
hetTab$Site = finalTabSite
hetTabPub = hetTab %>% dplyr::select(-Theta) %>%
flextable() %>%
flextable::compose(part = "header", j = "n", value = as_paragraph(as_i("n"))) %>%
flextable::compose(part = "header", j = "Ha", value = as_paragraph(as_i("H"), as_sub("A"))) %>%
flextable::compose(part = "header", j = "F", value = as_paragraph(as_i("F"))) %>%
flextable::compose(part = "header", j = "Rab", value = as_paragraph(as_i("R"), as_i(as_sub("AB")))) %>%
flextable::font(fontname = "Times New Roman", part = "all") %>%
flextable::fontsize(size = 10, part = "all") %>%
flextable::bold(part = "header") %>%
flextable::align(align = "left", part = "all") %>%
flextable::autofit()
table2 = read_docx()
table2 = body_add_flextable(table2, value = hetTabPub)
print(table2, target = "../tables/table2.docx")
hetTabPub
Site | Depth | Sampling | Average | n | HA | F | RAB |
---|---|---|---|---|---|---|---|
Upper Keys | Shallow | 18.3--28.3 | 23.6 | 30 | 0.0026 ± 0.00004 | 0.23 ± 0.014 | 0.06 ± 0.004 |
Mesophotic | 41.8--45.4 | 43.9 | 30 | 0.0027 ± 0.00004 | 0.17 ± 0.005 | 0.03 ± 0.001 | |
Lower Keys | Shallow | 17.1--19.2 | 18.0 | 30 | 0.0025 ± 0.00003 | 0.24 ± 0.013 | 0.09 ± 0.004 |
Mesophotic | 31.4--35.1 | 32.8 | 30 | 0.0028 ± 0.00003 | 0.15 ± 0.005 | 0.03 ± 0 | |
Tortugas Bank | Shallow | 14.6--29.9 | 21.1 | 30 | 0.0026 ± 0.00005 | 0.23 ± 0.015 | 0.07 ± 0.002 |
Mesophotic | 30.2--35.1 | 32.0 | 25 | 0.0028 ± 0.00013 | 0.17 ± 0.009 | 0.04 ± 0.001 | |
Riley's Hump | Shallow | 25.3--28 | 26.4 | 30 | 0.0026 ± 0.00004 | 0.22 ± 0.013 | 0.06 ± 0.002 |
Mesophotic | 30.8--37.5 | 33.2 | 15 | 0.0027 ± 0.00005 | 0.19 ± 0.014 | 0.05 ± 0.002 |
Heterozygosity across all RAD loci by lineage
leveneTest(lm(He ~ cluster, data = subset(het, subset = pcangsd$cluster!="Admixed")))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 16.544 0.00000000000817 ***
## 212
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hetAov = welch_anova_test(He ~ cluster, data = subset(het, subset = het$cluster!="Admixed"))
hF = hetAov$statistic[[1]]
hetPH = games_howell_test(He ~ cluster, data = subset(het, subset = het$cluster!="Admixed"), conf.level = 0.95) %>% as.data.frame()
hetLetters = data.frame(x = factor(c("Blue", "Teal", "Green", "Yellow")), y = c(0.0039, 0.0039, 0.0039, 0.0039), grp = c("a", "bc", "b", "c"))
hetPlotKA = ggplot(data = het %>% filter(cluster != "Admixed"), aes(x = cluster, y = He)) +
geom_beeswarm(aes(fill = cluster), shape = 21, size = 2, cex = 0.75, alpha = 0.5) +
geom_violin(aes(fill = cluster, group = cluster), adjust = 1, linewidth = 0, color = "black", alpha = 0.35, width = 0.9, trim = F, scale = "width") +
geom_violin(aes(fill = cluster, group = cluster), adjust = 1, linewidth = 0.4, color = "black", alpha = 1, width = 0.9, trim = F, fill = NA, scale = "width") +
geom_boxplot(aes(fill = cluster, group = cluster), width = 0.2, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.6, alpha = 0.5) + xlab("Lineage") +
geom_text(data = hetLetters, aes(x = x, y = y, label = grp), size = 4) +
annotate(geom = "text", x = 3.65, y =0.0036, label = bquote(italic("F")~" = "~.(hF)*"; "~italic("p")~" < 0.001"), size = 3) +
scale_fill_discrete(type = kColPal, name = "Lineage") +
xlab("Lineage") +
ylab("Heterozygosity") +
scale_y_continuous(breaks = seq(0.0022, 0.0038, 0.0004)) +
coord_cartesian(expand = TRUE, xlim = c(0.85, 4)) +
theme_bw()
hetPlotK = hetPlotKA + theme(
axis.title = element_text(color = "black", size = 12),
axis.text = element_text(color = "black", size = 10),
legend.position = "none",
legend.key.size = unit(0.3, 'cm'),
panel.border = element_rect(color = "black", size = 1),
panel.background = element_blank(),
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
# hetPlotK
leveneTest(lm(inbreed ~ cluster, data = subset(het, subset = pcangsd$cluster!="Admixed")))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 13.535 0.0000000007639 ***
## 212
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ibAov = welch_anova_test(inbreed ~ cluster, data = subset(het, subset = het$cluster!="Admixed"))
iF = ibAov$statistic[[1]]
ibPH = games_howell_test(inbreed ~ cluster, data = subset(het, subset = het$cluster!="Admixed"), conf.level = 0.95) %>% as.data.frame()
inbreedLetters = data.frame(x = factor(c("Blue", "Teal", "Green", "Yellow")), y = c(0.38, 0.38, 0.38, 0.38), grp = c("a", "b", "c", "d"))
inbreedingPlot = ggplot(data = het %>% filter(cluster!="Admixed"), aes(x = cluster, y = inbreed)) +
geom_beeswarm(aes(fill = cluster), shape = 21, size = 2, cex = 1.5, alpha = 0.5) +
geom_violin(aes(fill = cluster), adjust = 1, linewidth = 0, color = "black", alpha = 0.35, width = 0.9, trim = F, scale = "width") +
geom_violin(adjust = 1, linewidth = 0.4, color = "black", alpha = 1, width = 0.9, trim = F, fill = NA, scale = "width") +
geom_boxplot(aes(fill = cluster),width = 0.2, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.6, alpha = 0.5) +
geom_text(data = inbreedLetters, aes(x = x, y = y, label = grp), size = 4) +
annotate(geom = "text", x = 3.6, y =0.032, label = bquote(italic("F")~" = "~.(iF)*"; "~italic("p")~" < 0.001"), size = 3) +
xlab("Lineage") +
ylab(bquote(~"Inbreeding coefficient ("*italic(F)*")")) +
scale_fill_manual(values = kColPal) +
scale_color_manual(values = kColPal) +
scale_y_continuous(breaks=seq(0, 0.4, by = .05)) +
coord_cartesian(expand = TRUE, xlim = c(0.78, 4)) +
theme_bw() +
theme(legend.position = "none",
axis.text = element_text(size = 10, color = "black"),
axis.title = element_text(size = 12, color = "black"),
panel.border = element_rect(color = "black", size = 1),
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
# inbreedingPlot
# mean depth for lineages
subset(pcangsd, subset = pcangsd$cluster!="Admixed") %>% group_by(cluster) %>% summarize(depth = mean(depthm))
## # A tibble: 4 × 2
## cluster depth
## <fct> <dbl>
## 1 Blue 31.4
## 2 Teal 26.3
## 3 Green 22.5
## 4 Yellow 20.2
npgList = list(read_tsv("../data/snps/blue.thetas.idx.pestPG") %>% mutate(lineage = "Blue", depth = 31.4),
read_tsv("../data/snps/teal.thetas.idx.pestPG") %>% mutate(lineage = "Teal", depth = 26.3),
read_tsv("../data/snps/green.thetas.idx.pestPG") %>% mutate(lineage = "Green", depth = 22.5),
read_tsv("../data/snps/yellow.thetas.idx.pestPG")%>% mutate(lineage = "Yellow", depth = 20.2))
## Rows: 30 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 30 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 30 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 30 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
piAll = purrr::reduce(npgList, rbind) %>%
group_by(lineage) %>%
mutate(tPps = tP/nSites) %>%
summarize(tPps = mean(tPps), depth = min(depth))
piAll$lineage = as.factor(piAll$lineage)
piAll$lineage = factor(piAll$lineage, levels(piAll$lineage)[c(1, 3, 2, 4)])
lmpi = lm(tPps~depth, data=piAll)
summary(lmpi)
##
## Call:
## lm(formula = tPps ~ depth, data = piAll)
##
## Residuals:
## 1 2 3 4
## 0.000046806 -0.000204311 -0.000008903 0.000166408
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.00322104 0.00056843 5.667 0.0298 *
## depth 0.00007664 0.00002233 3.432 0.0754 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0001893 on 2 degrees of freedom
## Multiple R-squared: 0.8549, Adjusted R-squared: 0.7823
## F-statistic: 11.78 on 1 and 2 DF, p-value: 0.07542
r2 = round(summary(lmpi)$r.squared, 3)
nuclDivPlot = ggplot(piAll, aes(x = depth, y = tPps)) +
geom_smooth(se = FALSE, color = 'black', method='lm', linewidth = 0.75) +
geom_point(aes(fill = lineage),shape = 21, size = 3) +
scale_color_manual(values = kColPal) +
scale_fill_manual(values = kColPal) +
labs(x='Depth (m)', y = bquote("Nucleotide diversity ("*pi*")"), shape = 'Lineage') +
annotate(geom = "text", x = 30, y = 0.0048, label = bquote(italic(R^2)~"="~.(r2)), size = 3) +
theme_bw() +
theme(axis.title.y = element_text(color = "black", size = 12),
axis.title.x = element_text(color = "black", size = 12),
axis.text = element_text(color = "black", size = 10),
legend.position = "none",
legend.key.size = unit(0.3, 'cm'),
panel.border = element_rect(color = "black", size = 1),
panel.background = element_blank(),
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
nuclDivPlot
## `geom_smooth()` using formula = 'y ~ x'
lineagePlots = (lineageViolin | hetPlotK | inbreedingPlot) / (nuclDivPlot | swPlot | fstHeatmap) +
plot_annotation(tag_levels = "A") &
theme(plot.tag = element_text(size = 14))
# lineagePlots
ggsave("../figures/figure3.png", plot = lineagePlots, height = 7, width = 12, units = "in", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
ggsave("../figures/figure3.svg", plot = lineagePlots, height = 7, width = 12, units = "in", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
bayescan = read.table("../data/snps/fkSint.baye_fst.txt",header=T) %>% mutate(loc = rownames(.), out.05 = ifelse(qval < 0.05, 1, 0), out.1 = ifelse(qval < 0.1, 1, 0))
bayescan[bayescan[, 3]<=0.0001, 3] = 0.0001
bayeScEnv = read.table("../data/snps/fkSint.bayeS_fst.txt", header=T) %>% filter(qval_g < 0.05) %>% mutate(loc = rownames(.), depthOut = 1) %>% dplyr::select(loc, depthOut)
bayescan = bayescan %>% left_join(bayeScEnv)
## Joining with `by = join_by(loc)`
bayescan$depthOut = bayescan$depthOut %>% replace_na(0)
sum(bayescan$out.05)
## [1] 591
sum(bayescan$out.1)
## [1] 762
sum(bayescan$depthOut)
## [1] 6
for(i in 1:nrow(bayescan)){
if(bayescan$depthOut[i] == 1){
bayescan$out.05[i] = 2
}
}
bayescanPlotA = ggplot(data = bayescan, aes(x = log10(qval), y = fst, color = as.factor(out.05), alpha = as.factor(out.05))) +
geom_point(size = 1) +
geom_vline(xintercept = log10(0.05), linetype = 2, color = purple) +
xlab(expression(log[10]*"("*italic("q")*"-value)")) +
ylab(expression(italic("F")[ST])) +
scale_x_reverse() +
scale_color_manual(values = c("grey45", purple, teal)) +
scale_alpha_manual(values = c(0.25, 0.25, 0.5)) +
theme_bw()
bayescanPlot = bayescanPlotA +
theme(axis.title.x = element_text(color = "black", size = 12),
axis.text.x = element_text(color = "black", size = 12),
axis.ticks.x = element_line(color = "black"),
axis.title.y = element_text(color = "black", size = 12),
axis.text.y = element_text(color = "black", size = 10),
axis.ticks.y = element_line(color = "black"),
legend.position = "none",
legend.key.size = unit(0.3, 'cm'),
panel.border = element_rect(color = "black"),
panel.background = element_rect(fill = "white"),
plot.background = element_rect(fill = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
# bayescanPlot
ggsave("../figures/figureS2.svg", plot = bayescanPlot, width = 14, height = 8, units = "cm", dpi = 300)
ggsave("../figures/figureS2.png", plot = bayescanPlot, width = 14, height = 8, units = "cm", dpi = 300)
Procrustes analysis using IBS distance matrix and geographic locations
sintIBS = read.table("../data/snps/sintFiltSnps.ibsMat")%>% as.matrix() %>% as.dist(diag = FALSE)
popData = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66, 68, 164, 166, 209, 211),] %>% dplyr::select("sample" = tubeID, "Site" = site, "Depth" = depthZone) %>% mutate(popDepth = paste(Site, Depth))
coords = fknmsSites %>% group_by(site, depthZone, siteID) %>% summarise(latDD = first(latDD), longDD = first(longDD)) %>% filter(siteID %in% c("Ian's Lumps Site 52", "Site 48", "Site 47", "Site 45", "Site 35/36", "Site 37", "Site 39", "Site 19")) %>% dplyr::select(-site) %>% mutate(popDepth = paste(site, depthZone)) %>% droplevels()
## `summarise()` has grouped output by 'site', 'depthZone'. You can override using the
## `.groups` argument.
## Adding missing grouping variables: `site`
coords = coords[-5,]
sintIBD = popData %>% left_join(coords) %>% dplyr::select(-site, -depthZone, -popDepth)
## Joining with `by = join_by(popDepth)`
rownames(sintIBD) = sintIBD$sample
sintProj = st_as_sf(x = sintIBD, coords = c("longDD", "latDD"), crs = "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0") %>% st_transform(crs = 5070)
sintIBD2 = sintIBD %>% dplyr::select(-longDD, -latDD) %>% cbind(st_coordinates(sintProj))
set.seed(694)
ibd = protest(X = sintIBD2[,c(5, 6)], Y = cmdscale(sintIBS), choices = c(1, 2), permutations = 9999, symmetric = FALSE)
ibd
##
## Call:
## protest(X = sintIBD2[, c(5, 6)], Y = cmdscale(sintIBS), permutations = 9999, choices = c(1, 2), symmetric = FALSE)
##
## Procrustes Sum of Squares (m12 squared): 0.9866
## Correlation in a symmetric Procrustes rotation: 0.1156
## Significance: 0.074
##
## Permutation: free
## Number of permutations: 9999
plot(ibd, kind = 2)
plot(ibd, kind = 1)
We can make a plot demonstrating the lack of IBD from the procruestes analysis
ibdPlot = procrustes(X = sintIBD2[,c(5, 6)], Y = cmdscale(sintIBS), choices = c(1, 2), symmetric = FALSE)
sintIBDPlotData = cbind(ibdPlot$X, ibdPlot$Yrot) %>% as.data.frame()
rownames(sintIBDPlotData) = rownames(ibdPlot$X)
colnames(sintIBDPlotData) = c("lon", "lat", "ibs1", "ibs2")
sintIBDPlotData$sample = row.names(sintIBDPlotData)
sintIBDPlotData$sample = gsub(pattern = "\\.2", "", sintIBDPlotData$sample)
sintIBDPlotData = sintIBDPlotData %>% left_join(popData) %>% relocate(sample, .before = lon)
## Joining with `by = join_by(sample)`
sintIBDPlotData$Depth = factor(sintIBDPlotData$Depth)
sintIBDPlotData$Depth = factor(sintIBDPlotData$Depth, levels(sintIBDPlotData$Depth)[c(2,1)])
sintIBDPlotData$Site = factor(sintIBDPlotData$Site)
sintIBDPlotData$Site = factor(sintIBDPlotData$Site, levels(sintIBDPlotData$Site)[c(4, 1, 3, 2)])
head(sintIBDPlotData)
## sample lon lat ibs1 ibs2 Site Depth
## 1 SFK001 -120448.3 -27548.28 2455.9953 7226.742 Tortugas Bank Mesophotic
## 2 SFK002 -120448.3 -27548.28 -1831.4858 4942.716 Tortugas Bank Mesophotic
## 3 SFK003 -120448.3 -27548.28 -1293.2991 4657.087 Tortugas Bank Mesophotic
## 4 SFK004 -120448.3 -27548.28 3945.6627 6165.372 Tortugas Bank Mesophotic
## 5 SFK005 -120448.3 -27548.28 3871.4915 6251.292 Tortugas Bank Mesophotic
## 6 SFK006 -120448.3 -27548.28 -386.5259 7777.978 Tortugas Bank Mesophotic
## popDepth
## 1 Tortugas Bank Mesophotic
## 2 Tortugas Bank Mesophotic
## 3 Tortugas Bank Mesophotic
## 4 Tortugas Bank Mesophotic
## 5 Tortugas Bank Mesophotic
## 6 Tortugas Bank Mesophotic
convertedIBD = sintIBDPlotData %>% mutate(lon = lon+ibdPlot$translation[1], lat = lat+ibdPlot$translation[2], ibs1 = ibs1+ibdPlot$translation[1], ibs2 = ibs2+ibdPlot$translation[2])
sintIBDPlotPops = convertedIBD %>% dplyr::select(Site, Depth, lon, lat) %>% group_by(lon, lat)
#Calculate the angle of rotation, and then the slope of the rotated axis
ibdTheta = acos(ibdPlot$rotation[1,1])
ibdSlope = tan(ibdTheta)
#Calculate the y-intercepts for rotated axes
ibdInt = ibdPlot$translation[2] - (ibdSlope*ibdPlot$translation[1])
ibdInt2 = ibdPlot$translation[2] - (-1/ibdSlope*ibdPlot$translation[1])
florida2 = st_transform(florida, crs = 5070)
sintIBDPlotA = ggplot() +
geom_sf(data = florida2, fill = "white", size = 0.25) +
geom_abline(intercept = ibdInt, slope = ibdSlope, color = "gray75", linetype = 2) +
geom_abline(intercept = ibdInt2, slope = -(1/ibdSlope), color = "gray75", linetype = 2) +
geom_segment(data = convertedIBD, aes(x = ibs1, y = ibs2, xend = lon, yend = lat, color = Site), alpha = 0.5) +
geom_point(data = convertedIBD, aes(x = ibs1, y = ibs2, color = Site), shape = 16, alpha = 0.5)+
geom_point(data = sintIBDPlotPops, aes(x = lon, y = lat, fill = Site, shape = Depth), size = 2) +
annotate(geom = "label", x = 1344053.08266 , y = 368848.804494, label = " ", size = 10) +
annotate(geom = "text", x = 1340417.86738 , y = 372764.041467, label = "Procrustes analysis:", size = 3) +
annotate(geom = "text", x = 1345669.60498, y = 364652.694261, label = "italic(t[0]) == 0.1156*','~italic(p) == 0.074", parse = TRUE, size = 3) +
scale_color_manual(values = flPal) +
scale_fill_manual(values = flPal) +
scale_shape_manual(values = c(24, 25), name = "Depth zone") +
coord_sf(default_crs = sf::st_crs(5070), ylim = c(250000, 375000), xlim = c(1320000, 1600000)) +
annotation_scale(location = "br") +
annotation_north_arrow(location = "br", which_north = "true", style = north_arrow_minimal(), pad_x = unit(-0.25, "cm") , pad_y = unit(0.75, "cm")) +
guides(color = "none", fill = guide_legend(override.aes = list(shape = 15, color = flPal, size = 3), ncol =2, order = 1), shape = guide_legend(ncol = 1)) +
theme_bw()
sintIBDPlot = sintIBDPlotA +
theme(panel.background = element_rect(fill = "aliceblue"),
panel.border = element_rect(color = "black", size = 0.75, fill = NA),
axis.title = element_blank(),
axis.ticks = element_line(color = "black"),
axis.text = element_text(color = "black", size = 8),
legend.position = "bottom",
legend.direction = "vertical",
legend.box = "horizontal",
legend.key = element_blank(),
legend.background = element_blank(),
legend.title = element_text(color = "black", size = 8),
legend.text = element_text(color = "black", size = 8)
)
sintIBDPlot
ggsave("../figures/figureS3.png", plot = sintIBDPlot, height = 10, width = 16, units = "cm", dpi = 300)
ggsave("../figures/figureS3.svg", plot = sintIBDPlot, height = 8, width = 14, units = "cm", dpi = 300)
# Set directory for sdmPredictors to download/search for previously downloaded datasets
# Also allow more than 60 seconds to download larger datasets before timing out
options(sdmpredictors_datadir="../data/snps/bioOracle", timeout = max(300, getOption("timeout")))
sintMa = as.dist(read.table("../data/snps/sintFiltSnps.ibsMat"))
popData = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66, 68, 164, 166, 209, 211),] %>% dplyr::select(Sample, site, depthZone, latDD, longDD, depthM)
datasets = list_datasets(terrestrial = FALSE, marine = TRUE, freshwater = FALSE)
# Extract present-day data sets
present = list_layers(datasets) %>% dplyr::select(dataset_code, layer_code, name, units, description, contains("cellsize"), version) %>% filter(version == 22) %>% filter(layer_code %in% c("BO22_damean", "BO22_parmean", "BO22_ph", "BO22_curvelmax_bdmean", "BO22_salinitymean_bdmean", "BO22_salinitymean_ss", "BO22_curvelmean_ss", "BO22_curvelmean_bdmean", "BO22_dissoxmean_bdmean", "BO22_lightbotmax_bdmean", "BO22_lightbotmean_bdmean", "BO22_nitratemean_bdmean", "BO22_tempmax_bdmean", "BO22_tempmean_bdmean", "BO22_tempmean_ss", "BO22_ppmean_ss", "BO22_dissoxmean_ss", "BO22_ppmean_bdmean", "BO22_chlomean_ss", "BO22_chlomean_bdmean"))
envVar = load_layers(present$layer_code)
envData = data.frame(popData, raster::extract(envVar, popData[,5:4]))
# See what data are highly colinear
corData = rcorr(as.matrix(envData[,c(7:ncol(envData))]), type = "pearson")
corDataFlat = melt(corData$r, value.name = "r")
pDataFlat = melt(corData$P, value.name = "p")
corDataBind = corDataFlat %>% left_join(pDataFlat, by = c("Var1","Var2"))
ggplot(corDataBind) +
geom_tile(aes(x = Var1, y = Var2, fill = r)) +
scale_fill_gradient2(low = "#3B9AB2FF", high = "#F21A00FF") +
geom_text(data = filter(corDataBind, r >= 0.7, p < 0.05),aes(x = Var1, y = Var2, label = round(r, 2))) +
geom_text(data = filter(corDataBind, r <= -0.7, p < 0.05),aes(x = Var1, y = Var2, label = round(r, 2))) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90))
# Removing correlated data and biologically irrelevant measurements
envData2 = envData %>% dplyr::select("BO22_curvelmean_ss", "depthM", "BO22_lightbotmean_bdmean", "BO22_tempmean_bdmean", "BO22_ppmean_bdmean")
# Checking again to make sure we've eliminated all colinearity
corData2 = cor(envData2)
corMelt2 = melt(corData2)
ggplot(corMelt2) +
geom_tile(aes(x = Var1, y = Var2, fill = value)) +
scale_fill_gradient2(low = "#3B9AB2FF", high = "#F21A00FF") +
geom_text(data = corMelt2[corMelt2$value >= 0.7,],aes(x = Var1, y = Var2, label = round(value, 2))) +
geom_text(data = corMelt2[corMelt2$value <= -0.7,],aes(x = Var1, y = Var2, label = round(value, 2))) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90))
Looks good now!
Now we check variance inflation factors(VIF) to see if there is any multi-colinearity (VIF ≥ 10) between explanatory variables
vif = diag(solve(cor(envData2)))
vif
## BO22_curvelmean_ss depthM BO22_lightbotmean_bdmean
## 2.065801 1.664147 3.707624
## BO22_tempmean_bdmean BO22_ppmean_bdmean
## 3.215219 3.433064
No issues with the VIF measures so we can continue to run the redundancy analysis
dbrda0 = dbrda(sintMa ~ 1, data = envData2)
dbrda1 = dbrda(sintMa ~ ., data = envData2)
anova(dbrda1)
## Permutation test for dbrda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: dbrda(formula = sintMa ~ BO22_curvelmean_ss + depthM + BO22_lightbotmean_bdmean + BO22_tempmean_bdmean + BO22_ppmean_bdmean, data = envData2)
## Df SumOfSqs F Pr(>F)
## Model 5 0.6499 1.8091 0.001 ***
## Residual 214 15.3758
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RsquareAdj(dbrda1)
## $r.squared
## [1] 0.0405542
##
## $adj.r.squared
## [1] 0.01813724
The model explains some of the genetic variation we see. Now we can run forward selection to find the best model that explains the data without over-fitting.
set.seed(092)
bestDbrda = ordiR2step(dbrda0, dbrda1, permutations = how(nperm = 9999), R2permutations = 9999, direction = "forward")
## Step: R2.adj= 0
## Call: sintMa ~ 1
##
## R2.adjusted
## <All variables> 0.01813724460
## + depthM 0.01496351311
## + BO22_lightbotmean_bdmean 0.00236683995
## + BO22_ppmean_bdmean 0.00126355614
## + BO22_tempmean_bdmean 0.00007149916
## <none> 0.00000000000
## + BO22_curvelmean_ss -0.00057351162
##
## Df AIC F Pr(>F)
## + depthM 1 609 4.3268 0.0001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: R2.adj= 0.01496351
## Call: sintMa ~ depthM
##
## R2.adjusted
## <All variables> 0.01813724
## + BO22_ppmean_bdmean 0.01666504
## + BO22_tempmean_bdmean 0.01645891
## + BO22_lightbotmean_bdmean 0.01533888
## <none> 0.01496351
## + BO22_curvelmean_ss 0.01462044
##
## Df AIC F Pr(>F)
## + BO22_ppmean_bdmean 1 609.6 1.3772 0.0156 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: R2.adj= 0.01666504
## Call: sintMa ~ depthM + BO22_ppmean_bdmean
##
## R2.adjusted
## <All variables> 0.01813724
## + BO22_curvelmean_ss 0.01730993
## + BO22_tempmean_bdmean 0.01720985
## <none> 0.01666504
## + BO22_lightbotmean_bdmean 0.01650876
##
## Df AIC F Pr(>F)
## + BO22_curvelmean_ss 1 610.44 1.1424 0.1135
bestDbrda$anova
## R2.adj Df AIC F Pr(>F)
## + depthM 0.014963 1 609.0 4.3268 0.0001 ***
## + BO22_ppmean_bdmean 0.016665 1 609.6 1.3772 0.0156 *
## <All variables> 0.018137
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Depth and Primary productivity are the environmental variables that best explain the variation (although only 2.18%).
model = envData2 %>% dplyr::select("Depth" = depthM, "PP" = BO22_ppmean_bdmean)
sintDbrda = dbrda(sintMa ~ Depth + PP, model)
dbrdaVarPart = varpart(as.dist(sintMa), ~ Depth, ~ PP, data = model)
dbrdaDepth = dbrda(sintMa ~ Depth, data = model)
dbrdaPP = dbrda(sintMa ~ PP, data = model)
dbrdaVarPart
##
## Partition of squared Unknown user-supplied distance in dbRDA
##
## Call: varpart(Y = as.dist(sintMa), X = ~Depth, ~PP, data = model)
##
## Explanatory tables:
## X1: ~Depth
## X2: ~PP
##
## No. of explanatory tables: 2
## Total variation (SS): 16.026
## No. of observations: 220
##
## Partition table:
## Df R.squared Adj.R.squared Testable
## [a+c] = X1 1 0.01946 0.01496 TRUE
## [b+c] = X2 1 0.00582 0.00126 TRUE
## [a+b+c] = X1+X2 2 0.02565 0.01667 TRUE
## Individual fractions
## [a] = X1|X2 1 0.01540 TRUE
## [b] = X2|X1 1 0.00170 TRUE
## [c] 0 -0.00044 FALSE
## [d] = Residuals 0.98333 FALSE
## ---
## Use function 'dbrda' to test significance of fractions of interest
set.seed(003)
anova(sintDbrda)
## Permutation test for dbrda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: dbrda(formula = sintMa ~ Depth + PP, data = model)
## Df SumOfSqs F Pr(>F)
## Model 2 0.411 2.8557 0.001 ***
## Residual 217 15.615
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(002)
anova.cca(dbrdaDepth)
## Permutation test for dbrda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: dbrda(formula = sintMa ~ Depth, data = model)
## Df SumOfSqs F Pr(>F)
## Model 1 0.3119 4.3268 0.001 ***
## Residual 218 15.7138
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(001)
anova(dbrdaPP)
## Permutation test for dbrda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: dbrda(formula = sintMa ~ PP, data = model)
## Df SumOfSqs F Pr(>F)
## Model 1 0.0933 1.2771 0.042 *
## Residual 218 15.9324
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Now create the best model and prepare to plot
sintRdaVar = round(sintDbrda$CA$eig/sum(sintDbrda$CA$eig)*100, 1)
head(sintRdaVar)
## MDS1 MDS2 MDS3 MDS4 MDS5 MDS6
## 7.5 3.5 3.0 0.9 0.9 0.8
sintRdaVarFit = round(sintDbrda$CCA$eig/sum(sintDbrda$CCA$eig)*100, 1)
head(sintRdaVarFit)
## dbRDA1 dbRDA2
## 79.4 20.6
sintI2P = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66,68,164,166,209,211),] %>% dplyr::select("sample" = tubeID, "pop" = site, "depth" = depthZone)
sintI2P$popdepth = paste(sintI2P$pop, sintI2P$depth)
sintRdaPoints = as.data.frame(scores(sintDbrda)$sites)
sintRdaPoints$sample = sintI2P$sample
head(sintRdaPoints)
## dbRDA1 dbRDA2 sample
## V1 -0.7922262 -0.1970590 SFK001
## V2 -0.2260317 -0.6289874 SFK002
## V3 -0.2290753 -0.7679843 SFK003
## V4 -0.6542089 0.3964850 SFK004
## V5 -0.7412977 0.2625075 SFK005
## V6 -0.7770242 -0.5250274 SFK006
sintDbrdaData1 = sintI2P %>% left_join(sintRdaPoints) %>% left_join((pcangsd %>% dplyr::select(sample, "K" = cluster)))
## Joining with `by = join_by(sample)`
## Joining with `by = join_by(sample)`
head(sintDbrdaData1)
## sample pop depth popdepth dbRDA1 dbRDA2 K
## 1 SFK001 Tortugas Bank Mesophotic Tortugas Bank Mesophotic -0.7922262 -0.1970590 Blue
## 2 SFK002 Tortugas Bank Mesophotic Tortugas Bank Mesophotic -0.2260317 -0.6289874 Teal
## 3 SFK003 Tortugas Bank Mesophotic Tortugas Bank Mesophotic -0.2290753 -0.7679843 Teal
## 4 SFK004 Tortugas Bank Mesophotic Tortugas Bank Mesophotic -0.6542089 0.3964850 Blue
## 5 SFK005 Tortugas Bank Mesophotic Tortugas Bank Mesophotic -0.7412977 0.2625075 Blue
## 6 SFK006 Tortugas Bank Mesophotic Tortugas Bank Mesophotic -0.7770242 -0.5250274 Blue
tail(sintDbrdaData1)
## sample pop depth popdepth dbRDA1 dbRDA2 K
## 215 SFK215 Upper Keys Mesophotic Upper Keys Mesophotic -0.7418902 0.52256042 Blue
## 216 SFK216 Upper Keys Mesophotic Upper Keys Mesophotic -0.5707422 -0.04823706 Admixed
## 217 SFK217 Upper Keys Shallow Upper Keys Shallow -0.5132321 0.85251972 Blue
## 218 SFK218 Upper Keys Shallow Upper Keys Shallow 1.9734859 1.50165360 Green
## 219 SFK219 Upper Keys Shallow Upper Keys Shallow -0.5204182 0.27377460 Blue
## 220 SFK220 Upper Keys Shallow Upper Keys Shallow -0.6783090 0.34770009 Blue
envLoad = as.data.frame(sintDbrda$CCA$biplot)
envLoad$var = row.names(envLoad)
sintDbrdaData = merge(sintDbrdaData1, aggregate(cbind(mean.x = dbRDA1, mean.y = dbRDA2)~popdepth, sintDbrdaData1, mean), by="popdepth") %>% merge(., aggregate(cbind(mean.x.K = dbRDA1, mean.y.K = dbRDA2)~K, sintDbrdaData1, mean), by="K")
sintDbrdaData$depth = factor(sintDbrdaData$depth)
sintDbrdaData$depth = factor(sintDbrdaData$depth, levels(sintDbrdaData$depth)[c(2,1)])
sintDbrdaData$pop = factor(sintDbrdaData$pop)
sintDbrdaData$pop = factor(sintDbrdaData$pop, levels(sintDbrdaData$pop)[c(4, 1, 3, 2)])
head(sintDbrdaData)
## K popdepth sample pop depth dbRDA1
## 1 Admixed Upper Keys Shallow SFK192 Upper Keys Shallow -0.03684665
## 2 Admixed Tortugas Bank Mesophotic SFK033 Tortugas Bank Mesophotic 0.27002133
## 3 Admixed Upper Keys Mesophotic SFK216 Upper Keys Mesophotic -0.57074215
## 4 Blue Lower Keys Mesophotic SFK101 Lower Keys Mesophotic -0.75911010
## 5 Blue Lower Keys Mesophotic SFK102 Lower Keys Mesophotic -0.54508164
## 6 Blue Lower Keys Mesophotic SFK103 Lower Keys Mesophotic -0.67730352
## dbRDA2 mean.x mean.y mean.x.K mean.y.K
## 1 0.34178918 0.4425235 0.82035671 -0.1125225 -0.07736166
## 2 -0.52563711 -0.4813888 -0.31112580 -0.1125225 -0.07736166
## 3 -0.04823706 -0.6884161 0.50329778 -0.1125225 -0.07736166
## 4 0.08288081 -0.6053401 0.07088045 -0.6028628 0.12745589
## 5 0.33183958 -0.6053401 0.07088045 -0.6028628 0.12745589
## 6 -0.05143992 -0.6053401 0.07088045 -0.6028628 0.12745589
Now plot the dbRDA
sintDbrdaPlotA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
geom_point(data = sintDbrdaData, aes(x = dbRDA1, y = dbRDA2, fill = K, shape = depth), color = "black", size = 2, alpha = 1) +
scale_shape_manual(values = c(24, 25), name = "Depth Zone") +
geom_segment(data = envLoad, aes(x = 0, y = 0, xend = dbRDA1, yend = dbRDA2), color = "#F5065B", arrow = arrow(length = unit(0.15, "cm"), type = "open"), size = 0.65) +
geom_text(data = envLoad[1,], aes(x = dbRDA1, y = dbRDA2-0.15, label = var), color = "#F5065B", size = 3, fontface = "bold") +
geom_text(data = envLoad[2,], aes(x = dbRDA1-0.1, y = dbRDA2-0.05, label = var), color = "#F5065B", size = 3, fontface = "bold") +
scale_fill_manual(values = kColPal, name = "Lineage") +
scale_color_manual(values = kColPal, name = "Lineage", guide = NULL) +
labs(title = expression(italic("S. intersepta")), x = paste0("dbRDA 1 (", sintRdaVarFit[1], "% [",sintRdaVar[1], "%])"), y = paste0("dbRDA 2 (", sintRdaVarFit[2], "% [",sintRdaVar[2], "%])")) +
guides(shape = guide_legend(override.aes = list(size = 3, stroke = 0.5, alpha = 1), order = 2), fill = guide_legend(override.aes = list(shape = 22, size = 4, alpha = NA, fill = kColPal), order = 1))+
theme_bw()
sintDbrdaPlot = sintDbrdaPlotA +
theme(plot.title = element_text(color = "black", size = 12),
axis.title.x = element_text(color = "black", size = 10),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line.x = element_blank(),
axis.title.y = element_text(color = "black", size = 10),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.spacing = unit(-5, "pt"),
legend.key.size = unit(5, "pt"),
legend.position = c(1.1495, 0.5),
legend.title = element_text(size = 10),
legend.text = element_text(size = 8),
panel.border = element_rect(color = "black", size = 1),
panel.background = element_rect(fill = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
# sintDbrdaPlot
Checking deviance among model runs from BayesAss we ran on HPC
fileList = substr(list.files("../data/snps/bayesAss/", "BA3trace.*.txt$"),1,10)
bayesian_deviance <- function(trace, burnin = 0, sampling.interval = 0){
if(burnin == 0) stop('No burnin specified')
if(sampling.interval == 0) stop('No sampling interval specified')
range <- (trace$State > burnin & trace$State %% sampling.interval == 0)
D <- -2*mean(trace$LogProb[range])
return(D)
}
for(i in 1:length(fileList)){
assign(fileList[i], read.delim(paste("../data/snps/bayesAss/", fileList[i], ".txt", sep = ""))) %>% dplyr::select(-last_col())
print(paste(fileList[i], bayesian_deviance(get(fileList[i]), burnin = 3000000, sampling.interval = 100)))
}
## [1] "BA3trace01 6276550.568"
## [1] "BA3trace02 6274555.94971429"
## [1] "BA3trace03 6273601.16057143"
## [1] "BA3trace04 6274629.62228571"
## [1] "BA3trace05 6273869.57542857"
## [1] "BA3trace06 6273118.99771429"
## [1] "BA3trace07 6274232.18228571"
## [1] "BA3trace08 6274603.30514286"
## [1] "BA3trace09 6273023.81885714"
## [1] "BA3trace10 6273319.95257143"
All traces have similar deviance (this is good!). Using the trace with the lowest deviance (BA3trace09.txt, in this case)
bayesAss = read.delim("../data/snps/bayesAss/BA3trace09.txt") %>% filter(State > 3000000) %>% dplyr::select(-State, -LogProb, -X)
baMean = bayesAss %>% summarise(across(everything(), list(mean))) %>% t() %>% as_tibble() %>% rename(., mean=V1) %>% mutate(pops = colnames(bayesAss))
baSumm = bayesAss %>% summarise(across(everything(), list(median))) %>% t() %>% as.data.frame() %>% rename(., median=V1) %>% mutate(pops = baMean$pops, mean = round(baMean$mean, 3)) %>% relocate(median, .after = mean)
baSumm$median = round(baSumm$median, 3)
baHpd =as.data.frame(t(sapply(bayesAss, emp.hpd)))
colnames(baHpd) = c("hpdLow", "hpdHigh")
baHpd$pops = rownames(baHpd)
ESS = as.data.frame(sapply(bayesAss, ESS))
colnames(ESS) = "ESS"
baSumm = baSumm %>% left_join(baHpd)
## Joining with `by = join_by(pops)`
baSumm$hpdLow = round(baSumm$hpdLow, 3)
baSumm$hpdHigh = round(baSumm$hpdHigh, 3)
baSumm$ESS = ESS$ESS
### FROM BAYESASS: ###
## Population Index -> Population Label:
## 0->TortugasBank_Mesophotic 1->TortugasBank_Shallow
## 2->RileysHump_Mesophotic 3->RileysHump_Shallow
## 4->LowerKeys_Mesophotic 5->LowerKeys_Shallow
## 6->UpperKeys_Shallow 7->UpperKeys_Mesophotic
popi = rep(c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow", "Lower Keys\nMesophotic", "Lower Keys\nShallow", "Upper Keys\nShallow", "Upper Keys\nMesophotic"), each = 8)
popj = rep(c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow", "Lower Keys\nMesophotic", "Lower Keys\nShallow", "Upper Keys\nShallow", "Upper Keys\nMesophotic"), times = 8)
baSumm = baSumm %>% mutate(pop.i = popi, pop.j = popj) %>% relocate(c(pop.i, pop.j), .after = pops) %>% dplyr::select(-pops)
baSumm$pop.i = factor(baSumm$pop.i)
baSumm$pop.i = factor(baSumm$pop.i, levels = levels(baSumm$pop.i)[c(8, 2, 6, 4, 7, 1, 5, 3)])
baSumm$pop.j = factor(baSumm$pop.j)
baSumm$pop.j = factor(baSumm$pop.j, levels = levels(baSumm$pop.j)[c(8, 2, 6, 4, 7, 1, 5, 3)])
baSumm$site.i = word(baSumm$pop.i, 1, sep = "\n")
baSumm$site.i = factor(baSumm$site.i)
baSumm$site.i = factor(baSumm$site.i, levels = levels(baSumm$site.i)[c(4, 1, 3, 2)])
baSumm$site.j = word(baSumm$pop.j, 1, sep = "\n")
baSumm$site.j = factor(baSumm$site.j)
baSumm$site.j = factor(baSumm$site.j, levels = levels(baSumm$site.j)[c(4, 1, 3, 2)])
baSumm$depth.i = word(baSumm$pop.i, 2, sep = "\n")
baSumm$depth.i = factor(baSumm$depth.i)
baSumm$depth.i = factor(baSumm$depth.i, levels = levels(baSumm$depth.i)[c(2, 1)])
baSumm$depth.j = word(baSumm$pop.j, 2, sep = "\n")
baSumm$depth.j = factor(baSumm$depth.j)
baSumm$depth.j = factor(baSumm$depth.j, levels = levels(baSumm$depth.j)[c(2, 1)])
#All sites (excluding self retention)
baMeans = baSumm %>% filter(pop.i != pop.j) %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Global")
#mesophotic sources
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Mesophotic") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic Source") %>% bind_rows(baMeans, .)
#shallow sources
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Shallow") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Shallow Source") %>% bind_rows(baMeans, .)
#mesophotic sinks
baMeans = baSumm %>% filter(pop.i != pop.j, depth.i == "Mesophotic") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic Sink") %>% bind_rows(baMeans, .)
#shallow sinks
baMeans = baSumm %>% filter(pop.i != pop.j, depth.i == "Shallow") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Shallow Sink") %>% bind_rows(baMeans, .)
#mesophotic -> shallow
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Mesophotic", depth.i == "Shallow") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic -> Shallow") %>% bind_rows(baMeans, .)
#mesophotic -> mesophotic
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Mesophotic", depth.i == "Mesophotic") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic -> Mesophotic") %>% bind_rows(baMeans, .)
#shallow -> mesophotic
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Shallow", depth.i == "Mesophotic") %>% summarise(mean = mean(.$mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Shallow -> Mesophotic") %>% bind_rows(baMeans, .)
#shallow -> shallow
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Shallow", depth.i == "Shallow") %>% summarise(mean = round(mean(.$mean), 5), sd = round(sd(.$mean), 5), se = round(sd(.$mean)/sqrt(nrow(.)), 3)) %>% mutate(dataset = paste("Shallow -> Shallow")) %>% bind_rows(baMeans, .) %>% relocate(dataset, .before = mean) %>% as.data.frame()
baMeans[,c(2:4)] = baMeans[,c(2:4)] %>% round(4)
baMeans
## dataset mean sd se
## 1 Global 0.0374 0.0473 0.0063
## 2 Mesophotic Source 0.0552 0.0590 0.0111
## 3 Shallow Source 0.0196 0.0204 0.0039
## 4 Mesophotic Sink 0.0350 0.0570 0.0108
## 5 Shallow Sink 0.0399 0.0360 0.0068
## 6 Mesophotic -> Shallow 0.0489 0.0393 0.0098
## 7 Mesophotic -> Mesophotic 0.0637 0.0793 0.0229
## 8 Shallow -> Mesophotic 0.0135 0.0086 0.0022
## 9 Shallow -> Shallow 0.0278 0.0283 0.0080
baMeansTabPub = baMeans %>%
flextable() %>%
flextable::compose(part = "header", j = "dataset", value = as_paragraph("Dataset")) %>%
flextable::compose(part = "header", j = "mean", value = as_paragraph(as_i("m"))) %>%
flextable::compose(part = "header", j = "sd", value = as_paragraph("SD")) %>%
flextable::compose(part = "header", j = "se", value = as_paragraph("SEM")) %>%
flextable::font(fontname = "Times New Roman", part = "all") %>%
flextable::fontsize(size = 10, part = "all") %>%
flextable::bold(part = "header") %>%
flextable::align(align = "left", part = "all") %>%
flextable::autofit()
table3 = read_docx()
table3 = body_add_flextable(table3, value = baMeansTabPub)
print(table3, target = "../tables/table3.docx")
baMeansTabPub
Dataset | m | SD | SEM |
---|---|---|---|
Global | 0.0374 | 0.0473 | 0.0063 |
Mesophotic Source | 0.0552 | 0.0590 | 0.0111 |
Shallow Source | 0.0196 | 0.0204 | 0.0039 |
Mesophotic Sink | 0.0350 | 0.0570 | 0.0108 |
Shallow Sink | 0.0399 | 0.0360 | 0.0068 |
Mesophotic -> Shallow | 0.0489 | 0.0393 | 0.0098 |
Mesophotic -> Mesophotic | 0.0637 | 0.0793 | 0.0229 |
Shallow -> Mesophotic | 0.0135 | 0.0086 | 0.0022 |
Shallow -> Shallow | 0.0278 | 0.0283 | 0.0080 |
baSumm$mean = sprintf('%.3f', baSumm$mean)
baSumm$mean2 = baSumm$mean
baSumm$hpdLow = sprintf('%.3f', baSumm$hpdLow)
baSumm$hpdHigh = sprintf('%.3f', baSumm$hpdHigh)
baLabs = tibble(pop.i = unique(baSumm$pop.i), pop.j = unique(baSumm$pop.j))
migrateA = ggplot(data = baSumm, aes(pop.i, pop.j))+
geom_tile(data = subset(baSumm, subset = baSumm$mean2>0.65), fill = "gray35", color = "white") +
geom_segment(data = baSumm, aes(x = 0.4755, xend = -0.55, y = pop.j, yend = pop.j, color = pop.j), size = 14) +
geom_segment(data = baSumm, aes(x = pop.i, xend = pop.i, y = 0.45, yend = -0.425, color = pop.i), size = 32) +
scale_color_manual(values = flPal[c(1:4, 1:4)], guide = NULL) +
guides(fill = guide_colorbar(ticks.colour = "black", barwidth = 1, barheight = 10, frame.colour = "black")) +
# new_scale("fill") +
geom_tile(data = subset(baSumm, subset = baSumm$mean<0.65), aes(fill = as.numeric(as.character(mean))), color = "white") +
scale_fill_gradientn(colours = paletteer_c("viridis::viridis", n = 10)[3:10], limit = c(0,0.25), space = "Lab", name = expression(paste(italic("m"))), na.value = "transparent", guide = "colourbar", values = c(0, 0.05, 0.1, 0.15, 0.2,0.5,0.75,1)) +
geom_text(data = baSumm, aes(x = pop.i, y = pop.j, label = paste(mean, "\n", sep = "")), color = ifelse(baSumm$mean > 0.6, "white", "gray5"), fontface = ifelse(as.numeric(baSumm$hpdLow)>0, "bold", "plain"), size = ifelse(as.numeric(baSumm$hpdLow)>0, 4.75, 4)) +
geom_text(data = baSumm, aes(x = pop.i, y = pop.j, label = paste("\n(",hpdLow,"–",hpdHigh, ")", sep = "")), color = ifelse(baSumm$mean > 0.6, "white", "gray5"), size = 3.25) +
geom_text(data = (baLabs %>% filter(pop.j %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), x = -.02, aes(y = pop.j, label = pop.j), size = 3.75, color = "#FFFFFF", family = "sans") +
geom_text(data = (baLabs %>% filter(!pop.j %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), x = -.02, aes(y = pop.j, label = pop.j), size = 3.75, color = "#000000", family = "sans") +
geom_text(data = (baLabs %>% filter(pop.i %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), y = -.01, aes(x = pop.i, label = pop.i), size = 3.75, color = "#FFFFFF", family = "sans") +
geom_text(data = (baLabs %>% filter(!pop.i %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), y = -.01, aes(x = pop.i, label = pop.i), size = 3.75, color = "#000000", family = "sans") +
labs(x = "Sink", y = "Source") +
scale_y_discrete(limits = rev(levels(baSumm$pop.i))[c(1:8)], position = "left") +
coord_cartesian(xlim = c(1, 8), ylim = c(1, 8), clip = "off") +
theme_minimal()
migrate = migrateA + theme(
axis.text.x = element_text(vjust = 1, size = 12, hjust = 0.5, color = NA),
axis.text.y = element_text(size = 10, color = NA),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
panel.grid.major = element_blank(),
axis.ticks = element_blank(),
# legend.position = c(1.055, 0.5),
legend.direction = "vertical",
legend.title = element_text(size = 12, face = "bold")
)
migrate
baSumm$mean = as.numeric(baSumm$mean)
baSumm$hpdLow = as.numeric(baSumm$hpdLow)
baSumm$hpdHigh = as.numeric(baSumm$hpdHigh)
baSummSelf = baSumm %>% filter(pop.i == pop.j) %>% mutate(popdepth = paste(site.i, depth.i)) %>% mutate(retention = mean) %>% dplyr::select(-mean)
fknmsPopsMigrate2 = fknmsSites %>% group_by(site, depthZone, siteID) %>% dplyr::summarise(latDD = first(latDD), longDD = first(longDD)) %>% dplyr::filter(siteID %in% c("Ian's Lumps Site 52", "Site 48", "Site 47", "Site 45", "Site 35/36", "Site 37", "Site 39", "Site 19")) %>% dplyr::select(-site) %>% droplevels() %>% mutate(popdepth = paste(site, depthZone)) %>% as.data.frame() %>% slice(-5) %>% left_join(dplyr::select(baSummSelf, popdepth, retention))
## `summarise()` has grouped output by 'site', 'depthZone'. You can override using the
## `.groups` argument.
## Adding missing grouping variables: `site`
## Joining with `by = join_by(popdepth)`
fknmsPopsMigrate = fknmsPopsMigrate2[c(1,2,4,3,5:8),]
migratePal = c("Upper Keys" = flPal[1], "Lower Keys" = flPal[2], "Tortugas Bank" = flPal[3], "Riley's Hump" = flPal[4])
lines = c("Shallow" = 5, "Mesophotic" = 1)
baMapData = dplyr::select(baSumm, -mean2) %>% left_join(dplyr::select(fknmsPopsMigrate,-retention,-popdepth), by = c("site.i" = "site", "depth.i" = "depthZone")) %>% left_join(dplyr::select(fknmsPopsMigrate,-retention,-popdepth),, by = c("site.j" = "site", "depth.j" = "depthZone"), suffix = c(".i", ".j")) %>% filter(mean >= 0.02)
for(x in 1:nrow(baMapData)) {
if (baMapData$pop.i[x] == baMapData$pop.j[x]) {
baMapData$latDD.i[x] = NA;
baMapData$latDD.j[x] = NA;
baMapData$longDD.i[x] = NA;
baMapData$longDD.j[x] = NA;
baMapData$mean[x] = NA;
baMapData$median[x] = NA
}
}
migrateMap = ggplot() +
geom_sf(data = florida, fill = "white", size = 0.25) +
#SHALLOW SOURCES
geom_curve(data = baMapData[2,], aes(x = longDD.j, y = latDD.j, xend = longDD.i, yend = latDD.i-0.03, color = site.j, linetype = depth.j, size = mean), alpha = 0.7, arrow = arrow(type = "open", length = unit(0.03, "npc")), curvature = -3) +
geom_curve(data = baMapData[7,], aes(x = longDD.j, y = latDD.j, xend = longDD.i+0.03, yend = latDD.i, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = 0.25, na.rm = TRUE) +
geom_curve(data = baMapData[11,], aes(x = longDD.j, y = latDD.j, xend = longDD.i+0.02, yend = latDD.i-0.02, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = 0.2, na.rm = TRUE) +
geom_curve(data = baMapData[13,], aes(x = longDD.j, y = latDD.j, xend = longDD.i-0.04, yend = latDD.i+0.01, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = 1.25, na.rm = TRUE) +
geom_curve(data = baMapData[16,], aes(x = longDD.j, y = latDD.j, xend = longDD.i +0.03, yend = latDD.i, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = -0.25, na.rm = TRUE) +
geom_curve(data = baMapData[20,], aes(x = longDD.j, y = latDD.j, xend = longDD.i-0.04, yend = latDD.i+0.02, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = -0.2, na.rm = TRUE) +
geom_curve(data = baMapData[25,], aes(x = longDD.j, y = latDD.j, xend = longDD.i-0.05, yend = latDD.i, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = -0.2, na.rm = TRUE) +
#MESO SOURCES
geom_curve(data = baMapData[4,], aes(x = longDD.j, y = latDD.j, xend = longDD.i, yend = latDD.i+0.03, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = -3, na.rm = TRUE) +
geom_curve(data = baMapData[c(3, 6),], aes(x = longDD.j, y = latDD.j, xend = longDD.i+0.02, yend = latDD.i-0.02, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = -0.2, na.rm = TRUE) +
geom_curve(data = baMapData[8,], aes(x = longDD.j, y = latDD.j, xend = longDD.i, yend = latDD.i+0.03, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = 0.2, na.rm = TRUE) +
geom_curve(data = baMapData[10,], aes(x = longDD.j, y = latDD.j, xend = longDD.i +0.03, yend = latDD.i, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = -0.25, na.rm = TRUE) +
geom_curve(data = baMapData[12,], aes(x = longDD.j, y = latDD.j, xend = longDD.i+0.02, yend = latDD.i-0.04, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = -0.35, na.rm = TRUE) +
geom_curve(data = baMapData[15,], aes(x = longDD.j, y = latDD.j, xend = longDD.i+0.02, yend = latDD.i+0.02, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = -0.2, na.rm = TRUE) +
geom_curve(data = baMapData[17,], aes(x = longDD.j, y = latDD.j, xend = longDD.i+0.03, yend = latDD.i, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = -.35, na.rm = TRUE) +
geom_curve(data = baMapData[19,], aes(x = longDD.j, y = latDD.j, xend = longDD.i-0.04, yend = latDD.i+0.02, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = -0.2, na.rm = TRUE) +
geom_curve(data = baMapData[21,], aes(x = longDD.j, y = latDD.j, xend = longDD.i, yend = latDD.i-0.02, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = -9, na.rm = TRUE) +
geom_curve(data = baMapData[27,], aes(x = longDD.j, y = latDD.j, xend = longDD.i+0.03, yend = latDD.i, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = 2, na.rm = TRUE) +
geom_curve(data = baMapData[c(24, 29),], aes(x = longDD.j, y = latDD.j, xend = longDD.i-0.03, yend = latDD.i, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = 0.2, na.rm = TRUE) +
geom_curve(data = baMapData[c(23, 28),], aes(x = longDD.j, y = latDD.j, xend = longDD.i-0.04, yend = latDD.i+0.01, color = site.j, linetype = depth.j, size = mean), arrow = arrow(type = "open", length = unit(0.03, "npc")), alpha = 0.7, curvature = -0.2, na.rm = TRUE) +
scale_fill_manual(values = flPal, name = "Source site") +
scale_color_manual(values = migratePal, guide = NULL) +
scale_shape_manual(values = c(24, 25), name = "Depth") +
scale_size(range = c(0.5, 2), breaks = c(0.02,0.06,0.1,0.14,0.18,0.22),name = expression(paste("Migration (", italic("m"), ")", sep = "")), guide = guide_legend(ncol = 1, order = 5)) +
geom_point(data = fknmsPopsMigrate, aes(x = longDD, y = latDD, fill = site, shape = depthZone), size = 3.5) +
scale_linetype_manual(values = lines, name = "Source depth") +
coord_sf(xlim = c(-83.1, -80.25), ylim = c(24.3, 25.3)) +
scale_x_continuous(breaks = c(seq(-84, -80, by = .5))) +
scale_y_continuous(breaks = c(seq(24, 26, by = .2))) +
annotation_scale(location = "br") +
annotation_north_arrow(location = "br", which_north = "true", style = north_arrow_minimal(), pad_x = unit(-0.25, "cm") , pad_y = unit(0.75, "cm")) +
guides(fill = guide_legend(override.aes = list(shape = 22, color = NA, size = 4),ncol = 1, order = 1, reverse = TRUE), shape = guide_legend(override.aes = list(size = 3), order = 2)) +
theme_bw() +
theme(panel.background = element_rect(fill = "aliceblue"),
panel.border = element_rect(color = "black", size = 0.75, fill = NA),
plot.background = element_blank(),
axis.title = element_blank(),
axis.ticks = element_line(color = "black"),
axis.text = element_text(color = "black"),
plot.title = element_blank(),
legend.key.size = unit(15, "pt"),
legend.spacing = unit(-5, "pt"),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical",
legend.background = element_blank()
)
migrateMap
Putting the plots together into a single figure panel
migrationPlots = (migrate/(migrateMap) + theme(legend.box.margin = margin(-20, 0, 0, 0))) + plot_layout(heights = c(1, 1.1)) + plot_annotation(tag_levels = 'A') & theme(plot.tag = element_text(size = 20))
ggsave("../figures/figure5.png", plot = migrationPlots, width = 28, height = 24, units = "cm", dpi = 300)
ggsave("../figures/figure5.svg", plot = migrationPlots, width = 28, height = 25, units = "cm", dpi = 300)
Now let’s examine algal symbiont communities with the results of SymPortal analysis of Symbiodiniaceae ITS2 sequences.
How many raw reads?
rawItsReads = read.delim("../data/ITS2/sintItsReadCounts", header = FALSE)
colnames(rawItsReads) = c("sample", "reads")
rawItsReads$sample = gsub("_S.*", "", rawItsReads$sample)
rawItsReads = rawItsReads %>% group_by(sample) %>% summarise(reads = first(reads))
head(rawItsReads)
## # A tibble: 6 × 2
## sample reads
## <chr> <int>
## 1 SFK001 34806
## 2 SFK002 38328
## 3 SFK003 18122
## 4 SFK004 52311
## 5 SFK005 30791
## 6 SFK006 40545
#total reads
sum(rawItsReads$reads)
## [1] 7385411
#average reads/sample
(sum(rawItsReads$reads)/nrow(rawItsReads))
## [1] 33723.34
its2Seqs = read.delim("../data/ITS2/148_20210301_DBV_20210401T112728.seqs.absolute.abund_CLEAN.txt", header = TRUE)
its2Profs = read.csv("../data/ITS2/148_20210301_DBV_20210401T112728.profiles.absolute.abund_CLEAN.csv", header = TRUE, check.names = FALSE)
head(its2Seqs)
## Sample Symbiodinium A3 A3b A3at A3ax X43947_A X34778_A X495083_A X36534_A X34149_A
## 1 SFK115 0 12 0 0 0 0 0 0 0 0
## 2 SFK022 0 7 0 0 0 0 0 0 0 0
## 3 SFK025 28 242 7 7 0 6 0 0 7 0
## 4 SFK095 0 14 0 0 0 0 0 0 0 0
## 5 SFK170 0 26 0 0 0 0 0 0 0 0
## 6 SFK175 0 5 0 0 0 0 0 0 0 0
## X50854_A A3av A3s A3q X33981_A X1402229_A A3au A3aw X50835_A X34175_A X33953_A A3r
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 17 23 5 0 0 0 0 0 0 6
## 4 0 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0 0
## X1402205_A X364481_A X363583_A A4 X1402230_A X50833_A X50842_A X363143_A X72388_A
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 10 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 7 0 0 0 0 0
## X363606_A X797686_A X22386_A X529468_A X34696_A X363636_A X1402231_A X34151_A
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X364459_A X363570_A X363645_A X363578_A X1402232_A X364267_A X363625_A X50845_A
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X363142_A X1402267_A X22415_A X363639_A X905679_A X363598_A X363706_A X363685_A
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X1402206_A X1402233_A X1402235_A X43753_A X500385_A X1402234_A A3d X1402202_A
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X1402236_A X364620_A X363593_A X367833_A X22400_A X50850_A X22463_A X363687_A
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X693524_A X37988_A X66961_A X22436_A X363590_A X22426_A X45527_A A6b X36953_A
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## X363563_A X37985_A X693526_A X22451_A X33927_A X1402203_A A4.3 X35200_A X22392_A
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## X65140_A X1402245_A X364567_A X364639_A X1402216_A X22464_A X22429_A X49905_A
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X49571_A X29211_A X36825_A X364218_A X1402237_A X363617_A X73521_A X364172_A
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X363654_A X1402274_A X49906_A X37990_A X50843_A X363674_A X363646_A X33878_A
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X1402268_A X1402282_A X22444_A X373280_A X1402238_A X366219_A X69439_A Breviolum B5
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## B18c B18b X1402208_B X1402209_B X45548_B X1402210_B X43411_B X37534_B X1402211_B
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## X1402212_B X1402213_B X1160454_B X1402214_B X1402215_B X37591_B B5ai X1402254_B
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X71511_B X1402256_B X71527_B X1402255_B X1402257_B X71517_B X71509_B X71508_B
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X1402258_B X71518_B X71510_B X1402259_B X368876_B X1402260_B X50427_B X1402262_B
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X71525_B X71523_B X71515_B X368004_B X1402261_B X71526_B X71524_B X1402266_B
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X1402265_B X1402264_B X1402263_B X43545_B X1402252_B X900132_B X1390171_B X1402253_B
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## B1 X1402276_B X1402280_B X38112_B Cladocopium C3 C1 C16 C3go C3.10 C42.2 C1dl C3gm
## 1 0 0 0 0 761 10037 0 0 0 123 0 0 0
## 2 0 0 0 0 1273 15896 0 0 0 260 0 0 0
## 3 0 0 0 0 661 11333 0 0 0 115 0 0 0
## 4 0 0 0 0 1019 13663 0 0 0 717 0 0 0
## 5 0 0 0 0 1077 12007 0 0 0 619 0 0 0
## 6 0 0 0 0 1975 21478 0 0 0 385 0 0 110
## C3gl C3hb C3gr C16b X110271_C X334025_C C3gk C1dk X22330_C X11408_C X18596_C X21897_C
## 1 0 0 0 0 0 0 0 0 58 0 0 90
## 2 0 0 0 0 164 158 0 0 150 0 0 157
## 3 0 0 0 0 67 68 0 0 85 0 0 91
## 4 0 0 0 0 0 0 0 0 0 0 0 227
## 5 0 0 0 0 102 120 0 0 83 0 0 93
## 6 0 0 0 0 282 260 0 0 210 0 0 274
## C6c C3gq C3gp X65808_C C3gn C15hx C3dw C1cy X1402187_C X20795_C C93.1 X65703_C
## 1 0 0 0 54 0 0 0 0 181 0 0 144
## 2 0 0 0 0 0 0 0 0 120 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0 68
## 4 0 0 0 88 0 0 0 0 133 0 229 96
## 5 0 0 0 106 0 0 0 0 131 0 81 120
## 6 0 0 0 166 0 0 0 0 303 0 0 0
## X385070_C C3ge X1402188_C X1372_C X3238_C X95094_C C3hc X24879_C X91373_C X3699_C
## 1 0 0 107 0 0 0 0 0 0 0
## 2 0 0 93 0 135 0 0 188 0 108
## 3 0 0 76 0 0 0 0 100 0 0
## 4 0 0 120 185 0 0 0 0 0 0
## 5 0 0 89 82 0 0 0 0 0 0
## 6 0 0 128 0 0 0 0 120 0 0
## C3gt C3dz X20934_C C1af C3gs X25557_C X40208_C X470358_C X40209_C X1402193_C X2239_C
## 1 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 170 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 232 0 0 0 0 0 0 0 0
## X1402195_C C16a X17495_C X17534_C X2097_C X40211_C X93722_C C1v X40207_C X40212_C
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 55 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## X1402196_C X1402197_C X9944_C X1402198_C X1402219_C X470998_C X54162_C X22574_C
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 94 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X20921_C X33343_C X1402200_C X25492_C X1402218_C X3240_C X2037_C X85729_C X5371_C
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 51 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 57 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## X1402225_C X909389_C X1402207_C C3ag X2428_C X1402220_C X4062_C X103828_C X1402199_C
## 1 0 0 0 0 0 0 0 127 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## X1398518_C X90670_C X1402204_C X1402227_C X1402248_C C6b X1402247_C X1402194_C X870_C
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## X71029_C C3ga X91285_C X1402192_C X1402244_C C1bz X18746_C X1402228_C X694_C C3i
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## X1402250_C X1402243_C C21 X1402281_C C3ca X9108_C X11201_C X11191_C X7821_C
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## X1402273_C C3ck X1402240_C X1402249_C X99988_C X1356_C X69324_C X24193_C C3bb C40f
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## X1401572_C X47282_C X16815_C X5726_C X1402270_C X1402221_C C1ap X1402275_C X21673_C
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## X69758_C C1bt X1402269_C X2943_C C70 X1402271_C X42218_C X1402277_C X9807_C C1ai C3t
## 1 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0
## X54249_C X26258_C X1402278_C X1402272_C C1x X40218_C X21804_C X1402279_C C3de
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## X13929_C X23354_C X1402223_C X99010_C X983542_C X3366_C X1402226_C X1402222_C
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X42529_C X2152_C X62532_C X4558_C X2427_C X1829_C C3fo X18793_C X11809_C X31248_C
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## X1402241_C X816_C X921460_C C3ao C3an C3cn X3241_C X103581_C X21093_C X1402224_C
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## X18813_C X1402201_C X22869_C X23865_C C6a C1j X1402239_C X3434_C X22178_C X17016_C
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## X1402242_C X42518_C X54160_C X873_C C50f X1402246_C X1390080_C X113247_C X99987_C
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## X3601_C X1866_C X1402251_C X2895_C X9153_C C3fn X866_C X864_C X18159_C X990_C
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## X1402189_C X21205_C C3fc X871_C X1402190_C X8117_C X55844_C X54218_C X51874_C
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## X874099_C X27927_C C65b X46049_C X37410_C X28411_C D1 G3l G3b
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
sum(its2Seqs[,c(2:ncol(its2Seqs))])
## [1] 3280586
sum(its2Profs[,c(2:ncol(its2Profs))])
## [1] 2728837
its2SeqsGen = its2Seqs %>% rowwise() %>% summarise(sample = Sample, symbiodinium = sum(c_across(2:121)), breviolum = sum(c_across(122:176)), cladocopium = sum(c_across(177:405)), durusdinium = sum(c_across(406)), gerakladium = sum(c_across(406:407)))
round(sum(its2SeqsGen$symbiodinium)/sum(its2SeqsGen[,-1])*100, 2)
## [1] 19.67
round(sum(its2SeqsGen$breviolum)/sum(its2SeqsGen[,-1])*100, 2)
## [1] 0.29
round(sum(its2SeqsGen$cladocopium)/sum(its2SeqsGen[,-1])*100, 2)
## [1] 80.03
round(sum(its2SeqsGen$durusdinium)/sum(its2SeqsGen[,-1])*100, 4)
## [1] 0.0002
round(sum(its2SeqsGen$gerakladium)/sum(its2SeqsGen[,-1])*100, 4)
## [1] 0.002
its2ProfsGen = its2Profs %>% rowwise() %>% summarise(sample = Sample, symbiodinium = sum(c_across(2:7)), breviolum = sum(c_across(8:10)), cladocopium = sum(c_across(11:20)))
round(sum(its2ProfsGen$symbiodinium)/sum(its2ProfsGen[,-1])*100, 2)
## [1] 21.33
round(sum(its2ProfsGen$breviolum)/sum(its2ProfsGen[,-1])*100, 2)
## [1] 0.22
round(sum(its2ProfsGen$cladocopium)/sum(its2ProfsGen[,-1])*100, 2)
## [1] 78.45
Read in SymPortal outputs for ITS2 type profiles
stephanocoeniaMetaData = read.csv("../data/stephanocoeniaMetaData.csv", header = TRUE, check.names = FALSE)[-c(66, 68, 164, 166, 209, 211),] %>% dplyr::select(c(Sample = tubeID, site, depthM, depthZone))
its2Profs = read.csv("../data/ITS2/148_20210301_DBV_20210401T112728.profiles.absolute.abund_CLEAN.csv", header = TRUE, check.names = FALSE)
its2Profs = stephanocoeniaMetaData %>% right_join(its2Profs) %>% arrange(Sample)
## Joining with `by = join_by(Sample)`
its2Profs$site = factor(its2Profs$site)
its2Profs$site = factor(its2Profs$site, levels(its2Profs$site)[c( 2, 3, 1, 4)])
its2Profs$depthZone = factor(its2Profs$depthZone)
its2Profs$depthZone = factor(its2Profs$depthZone, levels(its2Profs$depthZone)[c(2, 1)])
its2Profs = its2Profs %>% arrange(site, depthZone, desc(`C3/C3.10`), desc(`C1/C3-C42.2-C1dl-C3gl-C3gm-C3gk`), desc(`C3-C1-C3.10`), desc(`C3-C1dk-C15hx`), desc(`C3-C3go-C6c-C3gq-C3gp-C3gn-C3dw`), desc(`C16/C3-C16b`), desc(`C3-C3hb-C3ge-C3hc-C1dk`), desc(`C3-C3gr-C3gt-C3gs-C3.10`), desc(`C3/C1`),desc(`A3-A3b-A3at-A3ax`), desc(`A3-A3at-A3b-A3q-A3s`), desc(`A3-A3s-A3q`), desc(`A3`), desc(`A3-A3b-A3av-A3au-A3aw`),desc(`A4`), desc(`C3`), desc(`B18b`), desc(`B18c`), desc(`B5`))
sampleCounts = plyr::count(its2Profs, c('site','depthZone'))
meltedList = reshape2::melt(lapply(sampleCounts$freq,function(x){c(1:x)}))
its2Profs$barPlotOrder = meltedList$value
its2Profs = its2Profs[c(1,ncol(its2Profs),2:(ncol(its2Profs)-1))]
head(its2Profs)
its2ProfsPerc = its2Profs
its2ProfsPerc$sum = apply(its2ProfsPerc[, c(6:length(its2ProfsPerc[1,]))], 1, function(x) {
sum(x, na.rm = T)
})
its2ProfsPerc = cbind(its2ProfsPerc[, c(1:5)], (its2ProfsPerc[, c(6:(ncol(its2ProfsPerc)-1))]
/ its2ProfsPerc$sum))
head(its2ProfsPerc)
## Sample barPlotOrder site depthM depthZone A3-A3b-A3at-A3ax
## 1 SFK068 1 Riley's Hump 27.4 Shallow 0
## 2 SFK091 2 Riley's Hump 26.2 Shallow 0
## 3 SFK073 3 Riley's Hump 26.2 Shallow 0
## 4 SFK084 4 Riley's Hump 26.2 Shallow 0
## 5 SFK083 5 Riley's Hump 26.2 Shallow 0
## 6 SFK082 6 Riley's Hump 26.5 Shallow 0
## A3-A3at-A3b-A3q-A3s A3-A3s-A3q A3 A3-A3b-A3av-A3au-A3aw A4 B18b B18c B5 C3/C3.10
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## C1/C3-C42.2-C1dl-C3gl-C3gm-C3gk C3-C1-C3.10 C3-C1dk-C15hx
## 1 1 0 0
## 2 1 0 0
## 3 1 0 0
## 4 1 0 0
## 5 1 0 0
## 6 1 0 0
## C3-C3go-C6c-C3gq-C3gp-C3gn-C3dw C16/C3-C16b C3-C3hb-C3ge-C3hc-C1dk
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## C3-C3gr-C3gt-C3gs-C3.10 C3/C1 C3
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
# check that all proportions add up to 1
apply(its2ProfsPerc[, c(6:(ncol(its2ProfsPerc)))], 1, function(x) {
sum(x, na.rm = T)
})
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [42] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [83] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [124] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [165] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [206] 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Everything looks good and is ready to plot
gssProf = otuStack(its2ProfsPerc, count.columns = c(6:length(its2ProfsPerc[1, ])),
condition.columns = c(1:5)) %>% filter(otu != "summ") %>% droplevels() # remove summ rows
levels(gssProf$otu)
levels(gssProf$depthZone)
levels(gssProf$site)
zooxAnno = data.frame(x1 = c(0.5, 0.5, 0.5, 0.5), x2 = c(30.5, 30.5, 30.5, 30.5),
y1 = -0.12, y2 = -0.12, site = c("Riley's Hump", "Tortugas Bank", "Lower Keys", "Upper Keys"))
zooxAnno$site = factor(zooxAnno$site)
zooxAnno$site = factor(zooxAnno$site, levels = levels(zooxAnno$site)[c(2, 3, 1, 4)])
gssProfPlot = gssProf %>% left_join(zooxAnno, by = "site")
its2ProfsPlotA = ggplot(gssProfPlot, aes(x = barPlotOrder, y = count, fill = otu)) +
geom_bar(position = "stack", stat = "identity", color = "gray25", size = 0.25) +
scale_fill_manual(values = profPal, name = expression(paste(italic("ITS2"), " type profile"))) +
scale_color_manual(values = rev(flPal)) +
geom_segment(data = gssProfPlot %>% filter(depthZone == "Mesophotic"), aes(x = x1, xend = x2, y = y1, yend = y2, color = site), linewidth = 7) +
geom_text(data = (gssProfPlot %>% filter(depthZone == "Mesophotic", site %in% c("Riley's Hump", "Tortugas Bank"), Sample %in% c("SFK001", "SFK100"), otu == "A4")), x = 15.5, y = -.1, aes(label = site), size = 4.4, color = "#FFFFFF") +
geom_text(data = (gssProfPlot %>% filter(depthZone == "Mesophotic", site %in% c("Lower Keys", "Upper Keys"), Sample %in% c("SFK101", "SFK201"), otu == "A4")), x = 15.5, y = -.1, aes(label = site), size = 4.4, color = "#000000") +
labs(title = expression(italic("SymPortal")), fill = expression(paste(italic("ITS2"), " type profile"))) +
guides(color = "none", fill = guide_legend(ncol = 3, reverse = FALSE)) +
facet_grid(factor(depthZone) ~ site, scales = "free", switch = "both", space = "free") + # faceting plots by Depth and Site
coord_cartesian(ylim = c(0, 1), xlim = c(0.5, 30.5), clip = "off") +
scale_x_discrete(expand = c(0.005, 0.005)) +
scale_y_continuous(expand = c(0.001, 0.001)) +
theme_bw()
its2ProfsPlot = its2ProfsPlotA +
theme(plot.title = element_text(),
panel.grid = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(fill = NA, color = "black", size = 0.75, linetype = "solid"),
plot.background = element_blank(),
legend.background = element_blank(),
panel.spacing.x = grid:::unit(0.05, "lines"),
panel.spacing.y = grid:::unit(0.05, "lines"),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
axis.title = element_blank(),
strip.background.x = element_blank(),
strip.background.y = element_blank(),
strip.text = element_text(size = 12),
strip.text.y.left = element_text(angle = 90),
strip.text.x.bottom = element_text(vjust = .75, color = NA),
legend.key.size = unit(0.75, "line"),
legend.key = element_blank(),
legend.title = element_text(size = 10, angle = 90),
legend.text = element_text(size = 8),
legend.position = "bottom")
# its2ProfsPlot
Pulling genera of Symbiodiniaceae from SNPS and comparing to genera
of ITS2 profiles from SymPortal
popData = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66, 68, 164, 166, 209, 211),] %>% dplyr::select("Sample" = tubeID, "site" = site, "depth" = depthZone)
zoox = read.delim("../data/snps/symbionts/zooxReads", header = FALSE, check.names = FALSE)
head(zoox)
## V1 V2
## 1 fk_S001.trim.bt2.bam NA
## 2 chr1 103
## 3 chr2 95
## 4 chr3 135
## 5 chr4 123
## 6 chr5 11
#Reconstruct read mapping output into dataframe usable for analysis
zoox$V2[is.na(zoox$V2)] <- as.character(zoox$V1[is.na(zoox$V2)])
zoox$V1 = gsub(pattern = "fk_*", "chr", zoox$V1)
zoox$V2 = gsub(".trim.*", "", zoox$V2)
zoox = zoox %>% filter(zoox$V1 != "*")
zooxLst = split(zoox$V2, as.integer(gl(length(zoox$V2), 20, length(zoox$V2))))
zooxMaps = NULL
for(i in zooxLst){
zooxMaps = rbind(zooxMaps, data.frame(t(i)))
}
#remove tech reps
zooxMaps = zooxMaps[-c(66, 68, 164, 166, 209, 211),]
#rename columns and samples to match other ITS2 dataframe
zooxMaps$X1 = gsub("fk_S", "SFK", zooxMaps$X1)
zooxMaps$X1 = gsub("\\.[1-3]", "", zooxMaps$X1)
colnames(zooxMaps) = c("Sample",zoox$V1[c(2:20)])
#convert characters to numeric
str(zooxMaps)
## 'data.frame': 220 obs. of 20 variables:
## $ Sample: chr "SFK001" "SFK002" "SFK003" "SFK004" ...
## $ chr1 : chr "103" "86" "57" "970" ...
## $ chr2 : chr "95" "138" "63" "1096" ...
## $ chr3 : chr "135" "155" "78" "1471" ...
## $ chr4 : chr "123" "182" "34" "1394" ...
## $ chr5 : chr "11" "15" "5" "26" ...
## $ chr6 : chr "75" "72" "110" "42" ...
## $ chr7 : chr "93" "134" "123" "43" ...
## $ chr8 : chr "148" "230" "125" "100" ...
## $ chr9 : chr "98" "90" "78" "39" ...
## $ chr10 : chr "3131" "3973" "3664" "2855" ...
## $ chr11 : chr "4468" "5327" "5232" "3649" ...
## $ chr12 : chr "4696" "5532" "5511" "3902" ...
## $ chr13 : chr "4360" "5070" "5102" "3546" ...
## $ chr14 : chr "3750" "4587" "4354" "3294" ...
## $ chr15 : chr "549" "590" "554" "458" ...
## $ chr16 : chr "35" "121" "24" "87" ...
## $ chr17 : chr "34" "70" "25" "38" ...
## $ chr18 : chr "25" "61" "21" "34" ...
## $ chr19 : chr "3" "6" "1" "11" ...
for(i in c(2:20)){
zooxMaps[,i] = as.numeric(zooxMaps[,i])
}
str(zooxMaps)
## 'data.frame': 220 obs. of 20 variables:
## $ Sample: chr "SFK001" "SFK002" "SFK003" "SFK004" ...
## $ chr1 : num 103 86 57 970 73 3600 31 52 91 46 ...
## $ chr2 : num 95 138 63 1096 89 ...
## $ chr3 : num 135 155 78 1471 121 ...
## $ chr4 : num 123 182 34 1394 79 ...
## $ chr5 : num 11 15 5 26 7 118 15 14 12 14 ...
## $ chr6 : num 75 72 110 42 52 72 58 78 99 76 ...
## $ chr7 : num 93 134 123 43 76 76 52 143 166 126 ...
## $ chr8 : num 148 230 125 100 118 124 99 185 215 191 ...
## $ chr9 : num 98 90 78 39 50 58 37 107 105 73 ...
## $ chr10 : num 3131 3973 3664 2855 1518 ...
## $ chr11 : num 4468 5327 5232 3649 1877 ...
## $ chr12 : num 4696 5532 5511 3902 2097 ...
## $ chr13 : num 4360 5070 5102 3546 1802 ...
## $ chr14 : num 3750 4587 4354 3294 1783 ...
## $ chr15 : num 549 590 554 458 255 200 500 823 589 865 ...
## $ chr16 : num 35 121 24 87 79 99 26 27 78 36 ...
## $ chr17 : num 34 70 25 38 42 57 30 51 58 50 ...
## $ chr18 : num 25 61 21 34 54 59 17 24 44 39 ...
## $ chr19 : num 3 6 1 11 2 16 3 0 0 18 ...
#collapse fake chromosomes into representativ genera
zooxMaps$Symbiodinium = rowSums(zooxMaps[2:6])
zooxMaps$Breviolum = rowSums(zooxMaps[7:10])
zooxMaps$Cladocopium = rowSums(zooxMaps[11:16])
zooxMaps$Durusdinium = rowSums(zooxMaps[17:20])
#keep genera totals and turn into proportions for barplot
zooxMaps = zooxMaps[,c(1, 21:24)]
zooxProp = zooxMaps
zooxProp$sum = apply(zooxProp[, c(2:length(zooxProp[1,]))], 1, function(x) {
sum(x, na.rm = T)
})
zooxProp = cbind(zooxProp$Sample, (zooxProp[, c(2:(ncol(zooxProp)-1))]
/ zooxProp$sum))
colnames(zooxProp)[1] = "Sample"
head(zooxProp)
## Sample Symbiodinium Breviolum Cladocopium Durusdinium
## 1 SFK001 0.021293088 0.018876527 0.9554076 0.004422761
## 2 SFK002 0.021785998 0.019894852 0.9485608 0.009758312
## 3 SFK003 0.009419339 0.017328405 0.9704304 0.002821827
## 4 SFK004 0.215007591 0.009715897 0.7679028 0.007373672
## 5 SFK005 0.036268921 0.029093768 0.9172400 0.017397287
## 6 SFK006 0.704310476 0.012543713 0.2743652 0.008780599
#Check that all samples total to 1
apply(zooxProp[, c(2:(ncol(zooxProp)))], 1, function(x) {
sum(x, na.rm = T)
})
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 67
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 157 158 159 160 161 162 163 165 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 203 204 205 206 207 208 210 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#add sample metadata to proportions
snpSym = popData %>% left_join(zooxProp)
## Joining with `by = join_by(Sample)`
Combining SNP and ITS2 data for comparison of Symbiodiniaceae genera This will allow us to plot individuals in the same order across methods
#sum profiles into genera
symGenera = its2Profs
symGenera$itsSymbiodinium = rowSums(symGenera[6:11])
symGenera$itsBreviolum = rowSums(symGenera[12:14])
symGenera$itsCladocopium = rowSums(symGenera[15:24])
symGenera$itsDurusdinium = 0
symGenera = symGenera %>% dplyr::select(Sample, barPlotOrder, itsSymbiodinium, itsBreviolum, itsCladocopium, itsDurusdinium)
#convert to proportions
symGenera$sum = apply(symGenera[, c(3:length(symGenera[1,]))], 1, function(x) {
sum(x, na.rm = T)
})
symGeneraProp = cbind(symGenera$Sample, symGenera[, c(3:(ncol(symGenera)-1))]
/ symGenera$sum)
colnames(symGeneraProp)[1] = "Sample"
#Check that all samples total to 1
apply(symGeneraProp[,c(2:5)], 1, function(x) {
sum(x, na.rm = T)
})
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [42] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [83] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [124] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [165] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [206] 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#construct combined dataframe
symGenera = symGenera %>% dplyr::select(Sample, barPlotOrder) %>% left_join(snpSym) %>% left_join(symGeneraProp)
## Joining with `by = join_by(Sample)`
## Joining with `by = join_by(Sample)`
symGenera$depth = factor(symGenera$depth)
symGenera$depth = factor(symGenera$depth, levels = levels(symGenera$depth)[c(2, 1)])
symGenera$site = factor(symGenera$site)
symGenera$site = factor(symGenera$site, levels = levels(symGenera$site)[c(2, 3, 1, 4)])
#turn into melted dataframe with otustack() and remove "summ" rows
gssSym = otuStack(symGenera, count.columns = c(5:length(symGenera[1, ])),
condition.columns = c(1:4)) %>% filter(otu != "summ") %>% droplevels()
#check that levels are correct/ordered
levels(gssSym$otu)
## [1] "Symbiodinium" "Breviolum" "Cladocopium" "Durusdinium"
## [5] "itsSymbiodinium" "itsBreviolum" "itsCladocopium" "itsDurusdinium"
levels(gssSym$depth)
## [1] "Shallow" "Mesophotic"
levels(gssSym$site)
## [1] "Riley's Hump" "Tortugas Bank" "Lower Keys" "Upper Keys"
SNPs:
gssSymPlot = gssSym %>% left_join(zooxAnno, by = "site")
zooxSNPA = ggplot(data = subset(gssSymPlot, subset = otu %in% c("Symbiodinium", "Breviolum", "Cladocopium", "Durusdinium" )), aes(x = barPlotOrder, y = count, fill = otu, order = barPlotOrder)) +
geom_point(aes(x=1, y=0.5, fill = otu), shape = 22, size = 0) +
geom_bar(stat = "identity", position = "stack", colour = "grey25", width = 1, size = 0.2, show.legend = FALSE) +
xlab("Population") +
scale_color_manual(values = rev(flPal)) +
scale_fill_manual(values = colPalZoox, name = "Symbiodiniaceae genus") +
geom_segment(data = (subset(gssSymPlot, subset = otu %in% c("Symbiodinium", "Breviolum", "Cladocopium", "Durusdinium" )) %>% filter(depth == "Mesophotic")), aes(x = x1, xend = x2, y = y1, yend = y2, color = site), size = 7) +
coord_cartesian(ylim = c(0, 1), xlim = c(0.5, 30.5), clip = "off") +
scale_x_discrete(expand = c(0.005, 0.005)) +
scale_y_continuous(expand = c(0.001, 0.001)) +
facet_grid(factor(depth) ~ site, drop = TRUE, scales = "free", switch = "both", space = "free") +
geom_text(data = subset(gssSymPlot, subset = otu == "Symbiodinium") %>% filter(Sample %in% c("SFK095", "SFK015")), x = 15.5, y = -.1, aes(label = site), size = 4.4, color = "#FFFFFF") +
geom_text(data = subset(gssSymPlot, subset = otu == "Symbiodinium") %>% filter(Sample %in% c("SFK195", "SFK156")), x = 15.5, y = -.1, aes(label = site), size = 4.4, color = "#000000") +
ggtitle("2bRAD") +
guides(fill = guide_legend(override.aes = list(size = 5), ncol = 1), color = "none") +
theme_bw()
zooxSNP = zooxSNPA + theme(plot.title = element_text(),
panel.grid = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(fill = NA, color = "black", size = 0.75, linetype = "solid"),
panel.spacing.x = grid:::unit(0.05, "lines"),
panel.spacing.y = grid:::unit(0.05, "lines"),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
axis.title = element_blank(),
strip.background.x = element_blank(),
strip.background.y = element_blank(),
strip.text = element_text(size = 12),
strip.text.y.left = element_text(size = 12, angle = 90),
strip.text.x.bottom = element_text(vjust = -.1, color = NA),
legend.title = element_text(size = 10),
legend.text = element_text(size = 8, face = "italic"),
legend.key.size = unit(0.5, "line"),
legend.key = element_blank(),
legend.position = "bottom",
legend.direction = "vertical",
legend.box = "horizontal")
zooxSNP
ITS2:
zooxITSA = ggplot(data = subset(gssSymPlot, subset = !(otu %in% c("Symbiodinium", "Breviolum", "Cladocopium", "Durusdinium" ))), aes(x = barPlotOrder, y = count, fill = otu, order = barPlotOrder)) +
geom_point(aes(x=1, y=0.5, fill = otu), shape = 22, size = 0) +
geom_bar(stat = "identity", position = "stack", colour = "grey25", width = 1, size = 0.2, show.legend = FALSE) +
xlab("Population") +
scale_fill_manual(values = colPalZoox, name = "Symbiodiniaceae genus", labels = c("Symbiodinium", "Breviolum", "Cladocopium", "Durusdinium")) +
scale_color_manual(values = rev(flPal)) +
geom_segment(data = (subset(gssSymPlot, subset = !(otu %in% c("Symbiodinium", "Breviolum", "Cladocopium", "Durusdinium" )))%>% filter(depth == "Mesophotic")), aes(x = x1, xend = x2, y = y1, yend = y2, color = site), size = 7) +
coord_cartesian(ylim = c(0, 1), xlim = c(0.5, 30.5), clip = "off") +
scale_x_discrete(expand = c(0.005, 0.005)) +
scale_y_continuous(expand = c(0.001, 0.001)) +
facet_grid(factor(depth) ~ site, drop = TRUE, scales = "free", switch = "both", space = "free") +
geom_text(data = subset(gssSymPlot, subset = otu == "itsSymbiodinium") %>% filter(Sample %in% c("SFK095", "SFK015")), x = 15.5, y = -.1, aes(label = site), size = 4.4, color = "#FFFFFF") +
geom_text(data = subset(gssSymPlot, subset = otu == "itsSymbiodinium") %>% filter(Sample %in% c("SFK195", "SFK156")), x = 15.5, y = -.1, aes(label = site), size = 4.4, color = "#000000") +
guides(fill = guide_legend(override.aes = list(size = 5), ncol = 1), color = "none") +
labs(title = expression(italic("ITS2"))) +
theme_bw()
zooxITS = zooxITSA + theme(plot.title = element_text(),
panel.grid = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(fill = NA, color = "black", size = 0.75, linetype = "solid"),
panel.spacing.x = grid:::unit(0.05, "lines"),
panel.spacing.y = grid:::unit(0.05, "lines"),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
axis.title = element_blank(),
strip.background.x = element_blank(),
strip.background.y = element_blank(),
strip.text = element_text(size = 12),
strip.text.y.left = element_text(size = 12, angle = 90),
strip.text.x.bottom = element_text(vjust = -.1, color = NA),
legend.title = element_text(size = 10),
legend.text = element_text(size = 8, face = "italic"),
legend.key = element_blank(),
legend.key.size = unit(0.1, "line"),
legend.position = "bottom",
legend.direction = "vertical",
legend.box = "horizontal")
# zooxITS
its2Plots = (its2ProfsPlot + theme(legend.position = "right", legend.title = element_text(angle = 0)) & guides(color = "none", fill = guide_legend(ncol = 1, reverse = FALSE)))/zooxITS/zooxSNP +
plot_annotation(tag_levels = "A") &
theme(plot.tag = element_text(size = 16),
axis.text = element_blank(),
axis.ticks = element_blank(),
legend.position = "right",
legend.title = element_text(color = "black", size = 10),
legend.text = element_text(color = "black", size = 8))
ggsave("../figures/figure6.png", plot = its2Plots, height = 26, width = 20, units = "cm", dpi = 300)
ggsave("../figures/figure6.svg", plot = its2Plots, height = 26, width = 20, units = "cm", dpi = 300)
Comparing the two outputs with procrustes analysis
popData = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66, 68, 164, 166, 209, 211),] %>% dplyr::select("Sample" = tubeID, "site" = site, "depth" = depthZone)
symSnpDf = zooxMaps %>% left_join(popData) %>% relocate(c(site, depth), .after = Sample) %>% filter(!row_number()==131) %>% mutate(dataSet = "SNPs") %>% relocate(dataSet, .after = Sample)
## Joining with `by = join_by(Sample)`
rownames(symSnpDf) = symSnpDf$Sample
symITS2 = its2Profs
symITS2$Symbiodinium = rowSums(symITS2[6:11])
symITS2$Breviolum = rowSums(symITS2[12:14])
symITS2$Cladocopium = rowSums(symITS2[15:24])
symITS2$Durusdinium = 0
symITS2Df = symITS2 %>% dplyr::select(Sample, Symbiodinium, Breviolum, Cladocopium, Durusdinium) %>% left_join(popData) %>% relocate(c(site, depth), .after = Sample) %>% arrange(Sample) %>% mutate(dataSet = "ITS2") %>% relocate(dataSet, .after = Sample)
## Joining with `by = join_by(Sample)`
rownames(symITS2Df) = symITS2Df$Sample
#create distance matrices
symSnpdist = vegdist(decostand(symSnpDf[c(5:ncol(symSnpDf))], method = "normalize"), method = "bray")
symITS2dist = vegdist(decostand(symITS2Df[c(5:ncol(symITS2Df))], method = "normalize"), method = "bray")
snpPcOA = cmdscale(symSnpdist, eig = TRUE, x.ret = TRUE)
its2PcOA = cmdscale(symITS2dist, eig = TRUE, x.ret = TRUE)
#procrustes analysis
its2GeneraProcrustes = protest(Y = its2PcOA, X = snpPcOA, choices = c(1, 2),
permutations = 9999, symmetric = FALSE)
its2GeneraProcrustes
##
## Call:
## protest(X = snpPcOA, Y = its2PcOA, permutations = 9999, choices = c(1, 2), symmetric = FALSE)
##
## Procrustes Sum of Squares (m12 squared): 0.1582
## Correlation in a symmetric Procrustes rotation: 0.9175
## Significance: 0.0001
##
## Permutation: free
## Number of permutations: 9999
plot(its2GeneraProcrustes, kind = 1)
plot(its2GeneraProcrustes, kind = 2)
popData = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66, 68, 164, 166, 209, 211),] %>% dplyr::select("sample" = tubeID, "site" = site, "depth" = depthZone)
symGenProcPlot = procrustes(X = snpPcOA, Y = its2PcOA, choices = c(1, 2), symmetric = FALSE)
symGenProcPlotData = cbind(symGenProcPlot$X, symGenProcPlot$Yrot) %>% as.data.frame()
rownames(symGenProcPlotData) = rownames(symGenProcPlot$X)
colnames(symGenProcPlotData) = c("x1", "y1", "x2", "y2")
symGenProcPlotData$sample = row.names(symGenProcPlotData)
symGenProcPlotData$sample = gsub(pattern = "\\.2", "", symGenProcPlotData$sample)
symGenProcPlotData = symGenProcPlotData %>% left_join(popData) %>% relocate(sample, .before = x1)
## Joining with `by = join_by(sample)`
symGenProcPlotData$depth = factor(symGenProcPlotData$depth)
symGenProcPlotData$depth = factor(symGenProcPlotData$depth, levels(symGenProcPlotData$depth)[c(2,1)])
symGenProcPlotData$site = factor(symGenProcPlotData$site)
symGenProcPlotData$site = factor(symGenProcPlotData$site, levels(symGenProcPlotData$site)[c(4, 1, 3, 2)])
head(symGenProcPlotData)
## sample x1 y1 x2 y2 site depth
## 1 SFK001 -0.11319743 0.009463110 -0.1427982 -0.0009174297 Tortugas Bank Mesophotic
## 2 SFK002 -0.11052338 0.010627368 -0.1264448 0.0067320405 Tortugas Bank Mesophotic
## 3 SFK003 -0.12633478 0.001067354 -0.1427982 -0.0009174297 Tortugas Bank Mesophotic
## 4 SFK004 0.06580863 -0.060023821 0.1332136 -0.0332631646 Tortugas Bank Mesophotic
## 5 SFK005 -0.09426666 0.029200059 -0.1427982 -0.0009174297 Tortugas Bank Mesophotic
## 6 SFK006 0.55445199 -0.083081992 0.4276686 -0.0594228026 Tortugas Bank Mesophotic
#Calculate the angle of rotation, and then the slope of the rotated axis
theta = acos(symGenProcPlot$rotation[1,1])
slope = tan(theta)
#Calculate the y-intercepts for rotated axes
symGenProcInt = symGenProcPlot$translation[2] - (slope*symGenProcPlot$translation[1])
symGenProcInt2 = symGenProcPlot$translation[2] - (-1/slope*symGenProcPlot$translation[1])
sintSymGenProcPlotA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", linetype = 1) +
geom_vline(xintercept = 0, color = "gray90", linetype = 1) +
geom_abline(intercept = symGenProcInt, slope = slope, color = "gray75", linetype = 2) +
geom_abline(intercept = symGenProcInt2, slope = -(1/slope), color = "gray75", linetype = 2) +
geom_segment(data = symGenProcPlotData, aes(x = x2*(1-symGenProcPlot$scale), y = y2*(1-symGenProcPlot$scale), xend = x1*(1-symGenProcPlot$scale), yend = y1*(1-symGenProcPlot$scale), color = site), alpha = 0.5) +
geom_point(data = symGenProcPlotData, aes(x = x2*(1-symGenProcPlot$scale), y = y2*(1-symGenProcPlot$scale), fill = site, shape = depth), alpha = 0.5)+
geom_point(data = symGenProcPlotData, aes(x = x1*(1-symGenProcPlot$scale), y = y1*(1-symGenProcPlot$scale), fill = site, shape = depth), size = 2) +
annotate(geom = "label", x = 0.172, y = 0.172, label = " ", size = 10) +
annotate(geom = "text", x = 0.172, y = 0.1825, label = "Procrustes analysis:", size = 3) +
annotate(geom = "text", x = 0.172, y = 0.165, label = "italic(t[0]) == 0.9175 *','~italic(p) < 0.0001", parse = TRUE, size = 3) +
scale_color_manual(values = flPal) +
scale_fill_manual(values = flPal, name = "Site") +
scale_shape_manual(values = c(24, 25), name = "Depth zone") +
guides(color = "none", fill = guide_legend(override.aes = list(shape = 15, color = flPal, size = 3), ncol =2, order = 1), shape = guide_legend(ncol = 1)) +
theme_bw()
sintSymGenProcPlot = sintSymGenProcPlotA +
theme(panel.grid = element_blank(),
panel.border = element_rect(color = "black", size = 0.75, fill = NA),
axis.title = element_blank(),
axis.ticks = element_line(color = "black"),
axis.text = element_text(color = "black", size = 8),
legend.position = "bottom",
legend.direction = "vertical",
legend.box = "horizontal",
legend.key = element_blank(),
legend.background = element_blank(),
legend.title = element_text(color = "black", size = 8),
legend.text = element_text(color = "black", size = 8)
)
sintSymGenProcPlot
its2DistA = its2Profs %>% arrange(Sample)
its2Dist = its2DistA[c(6:ncol(its2Profs))] %>% decostand("normalize") %>% vegdist(method = "bray")
its2PCoA = cmdscale(its2Dist, eig = TRUE, x.ret = TRUE)
sintIBS = read.table("../data/snps/sintFiltSnps.ibsMat")[-131,-131] %>% as.matrix() %>% as.dist(diag = FALSE)
sintPCoA = cmdscale(sintIBS, eig = TRUE, x.ret = TRUE)
set.seed(981)
its2IBSProcrustes = protest(X = sintPCoA, Y = its2PCoA, permutations = 9999)
its2IBSProcrustes
##
## Call:
## protest(X = sintPCoA, Y = its2PCoA, permutations = 9999)
##
## Procrustes Sum of Squares (m12 squared): 0.9479
## Correlation in a symmetric Procrustes rotation: 0.2283
## Significance: 0.0001
##
## Permutation: free
## Number of permutations: 9999
plot(its2IBSProcrustes)
plot(its2IBSProcrustes, kind = 2)
admixpops = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66, 68, 164, 166, 209, 211),] %>% dplyr::select("sample" = tubeID, "pop" = site, "depth" = depthZone)
admixpops$popdepth = as.factor(paste(admixpops$pop, admixpops$depth, sep = " "))
clumpp4 = read.table("../data/snps/k/ClumppK4.output", header = FALSE)
clumpp4$V1 = admixpops$sample
sintAdmix = admixpops[-131,] %>% left_join(clumpp4[,c(1, 6:9)], by = c("sample" = "V1"))
admixDist = sintAdmix[c(5:ncol(sintAdmix))] %>% vegdist(method = "euclidean")
admixPCoA = cmdscale(admixDist, eig = TRUE, x.ret = TRUE)
set.seed(981)
its2AdmixProcrustes = protest(X = admixPCoA, Y = its2PCoA, permutations = 9999)
its2AdmixProcrustes
##
## Call:
## protest(X = admixPCoA, Y = its2PCoA, permutations = 9999)
##
## Procrustes Sum of Squares (m12 squared): 0.9359
## Correlation in a symmetric Procrustes rotation: 0.2531
## Significance: 0.0001
##
## Permutation: free
## Number of permutations: 9999
plot(its2AdmixProcrustes, kind = 1)
plot(its2AdmixProcrustes, kind = 2)
Using vegan::betadisper()
to look at multivariate
homogeneity of dispersion (PERMDISP) between sites and depths. This is
using Bray-Curtis dissimilarity.
alpha = with(its2Profs, tapply(specnumber(its2Profs[, c(6:ncol(its2Profs))]), site, mean))
alpha
## Riley's Hump Tortugas Bank Lower Keys Upper Keys
## 1.177778 1.290909 1.237288 1.200000
gamma = with(its2Profs, specnumber(its2Profs[, c(6:ncol(its2Profs))], site))
gamma
## Riley's Hump Tortugas Bank Lower Keys Upper Keys
## 13 11 11 9
gamma/alpha
## Riley's Hump Tortugas Bank Lower Keys Upper Keys
## 11.037736 8.521127 8.890411 7.500000
set.seed(694)
its2dispS = betadisper(vegdist(decostand(its2Profs[, c(6:ncol(its2Profs))], "normalize")), its2Profs$site)
set.seed(694)
permutest(its2dispS, permutations = 9999)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 9999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.0892 0.029731 0.7719 9999 0.516
## Residuals 215 8.2815 0.038518
No significant effect of Site on betadiversity.
alpha = with(its2Profs, tapply(specnumber(its2Profs[, c(6:ncol(its2Profs))]), depthZone, mean))
alpha
## Shallow Mesophotic
## 1.310924 1.130000
gamma = with(its2Profs, specnumber(its2Profs[, c(6:ncol(its2Profs))], depthZone))
gamma
## Shallow Mesophotic
## 18 11
gamma/alpha
## Shallow Mesophotic
## 13.730769 9.734513
set.seed(694)
its2dispD = betadisper(vegdist(decostand(its2Profs[, c(6:ncol(its2Profs))], "normalize")), its2Profs$depthZone)
its2dispD
##
## Homogeneity of multivariate dispersions
##
## Call: betadisper(d = vegdist(decostand(its2Profs[, c(6:ncol(its2Profs))],
## "normalize")), group = its2Profs$depthZone)
##
## No. of Positive Eigenvalues: 28
## No. of Negative Eigenvalues: 27
##
## Average distance to median:
## Shallow Mesophotic
## 0.6525 0.5770
##
## Eigenvalues for PCoA axes:
## (Showing 8 of 55 eigenvalues)
## PCoA1 PCoA2 PCoA3 PCoA4 PCoA5 PCoA6 PCoA7 PCoA8
## 26.118 18.773 13.961 9.281 6.898 6.102 3.901 3.190
set.seed(694)
its2dispDPerm = permutest(its2dispD, permutations = 9999)
its2dispDPerm
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 9999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.3104 0.310449 10.815 9999 0.0013 **
## Residuals 217 6.2290 0.028705
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Depth does significantly affect beta diversity.
Now let’s see how different communities are from each other with
PERMANOVA. We will utilize the vegan::adonis()
function. We
will use Bray-Curtis similarity for our distance matrix and run a total
0f 9,999 permutations, and test the effects of Site, Depth, and the
interaction between Site and Depth.
set.seed(694)
its2Adonis = adonis2(decostand(its2Profs[, c(6:ncol(its2Profs))], "normalize") ~ site * depthZone, data = its2Profs, permutations = 9999, method = "bray")
its2Adonis
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = decostand(its2Profs[, c(6:ncol(its2Profs))], "normalize") ~ site * depthZone, data = its2Profs, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## site 3 8.455 0.09229 8.4735 0.0001 ***
## depthZone 1 5.368 0.05860 16.1410 0.0001 ***
## site:depthZone 3 7.612 0.08309 7.6290 0.0001 ***
## Residual 211 70.178 0.76602
## Total 218 91.613 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(694)
its2PWAdonis = pairwise.adonis(decostand(its2Profs[,c(6:ncol(its2Profs))], "normalize"), factors = its2Profs$site, sim.method = "bray", p.adjust.m = "fdr", perm = 9999)
its2PWAdonis
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 Riley's Hump vs Tortugas Bank 1 3.0054485 7.606522 0.07202701 0.0001 0.00015
## 2 Riley's Hump vs Lower Keys 1 4.5770266 11.695628 0.10286788 0.0001 0.00015
## 3 Riley's Hump vs Upper Keys 1 3.4373577 9.258215 0.08247250 0.0001 0.00015
## 4 Tortugas Bank vs Lower Keys 1 0.9811441 2.446466 0.02137651 0.0334 0.03340
## 5 Tortugas Bank vs Upper Keys 1 1.6940679 4.427003 0.03770004 0.0014 0.00168
## 6 Lower Keys vs Upper Keys 1 3.4239083 9.014882 0.07153823 0.0001 0.00015
## sig
## 1 **
## 2 **
## 3 **
## 4 .
## 5 *
## 6 **
its2profsRxD = paste(its2Profs$site, its2Profs$depthZone, sep = " ")
set.seed(694)
its2PWAdonis2 = pairwise.adonis(decostand(its2Profs[, c(6:ncol(its2Profs))], "normalize"), factors = its2profsRxD, sim.method = "bray", p.adjust.m = "fdr", perm = 9999)
its2PWAdonis2
## pairs Df SumsOfSqs F.Model
## 1 Riley's Hump Shallow vs Riley's Hump Mesophotic 1 0.6958548 1.851042
## 2 Riley's Hump Shallow vs Tortugas Bank Shallow 1 2.8030339 7.429995
## 3 Riley's Hump Shallow vs Tortugas Bank Mesophotic 1 3.0884300 8.640125
## 4 Riley's Hump Shallow vs Lower Keys Shallow 1 5.0368955 15.488866
## 5 Riley's Hump Shallow vs Lower Keys Mesophotic 1 4.6194798 13.467087
## 6 Riley's Hump Shallow vs Upper Keys Shallow 1 4.7577989 13.584574
## 7 Riley's Hump Shallow vs Upper Keys Mesophotic 1 2.2928012 6.845341
## 8 Riley's Hump Mesophotic vs Tortugas Bank Shallow 1 1.7078494 4.482170
## 9 Riley's Hump Mesophotic vs Tortugas Bank Mesophotic 1 1.1310176 3.195846
## 10 Riley's Hump Mesophotic vs Lower Keys Shallow 1 3.2155336 10.357235
## 11 Riley's Hump Mesophotic vs Lower Keys Mesophotic 1 1.8698060 5.584033
## 12 Riley's Hump Mesophotic vs Upper Keys Shallow 1 2.4146387 7.007461
## 13 Riley's Hump Mesophotic vs Upper Keys Mesophotic 1 0.4347921 1.342139
## 14 Tortugas Bank Shallow vs Tortugas Bank Mesophotic 1 2.6960612 7.456037
## 15 Tortugas Bank Shallow vs Lower Keys Shallow 1 1.9880105 6.041735
## 16 Tortugas Bank Shallow vs Lower Keys Mesophotic 1 3.3741943 9.729362
## 17 Tortugas Bank Shallow vs Upper Keys Shallow 1 3.5069150 9.905960
## 18 Tortugas Bank Shallow vs Upper Keys Mesophotic 1 3.2100015 9.476618
## 19 Tortugas Bank Mesophotic vs Lower Keys Shallow 1 5.1052811 16.781413
## 20 Tortugas Bank Mesophotic vs Lower Keys Mesophotic 1 0.3865517 1.192600
## 21 Tortugas Bank Mesophotic vs Upper Keys Shallow 1 1.2421221 3.741093
## 22 Tortugas Bank Mesophotic vs Upper Keys Mesophotic 1 1.2155556 3.855401
## 23 Lower Keys Shallow vs Lower Keys Mesophotic 1 6.2867594 21.368532
## 24 Lower Keys Shallow vs Upper Keys Shallow 1 6.0655965 20.114834
## 25 Lower Keys Shallow vs Upper Keys Mesophotic 1 6.1026797 21.338933
## 26 Lower Keys Mesophotic vs Upper Keys Shallow 1 1.5730255 4.919063
## 27 Lower Keys Mesophotic vs Upper Keys Mesophotic 1 2.7859051 9.149432
## 28 Upper Keys Shallow vs Upper Keys Mesophotic 1 3.3019234 10.593110
## R2 p.value p.adjusted sig
## 1 0.04127087 0.1058 0.1139384615
## 2 0.11355640 0.0001 0.0002000000 **
## 3 0.14017047 0.0001 0.0002000000 **
## 4 0.21367234 0.0001 0.0002000000 **
## 5 0.18843760 0.0001 0.0002000000 **
## 6 0.18976958 0.0001 0.0002000000 **
## 7 0.10556412 0.0003 0.0004941176 **
## 8 0.09439691 0.0008 0.0011200000 *
## 9 0.07757691 0.0243 0.0272160000 .
## 10 0.19781860 0.0001 0.0002000000 **
## 11 0.11493555 0.0025 0.0033333333 *
## 12 0.14012831 0.0003 0.0004941176 **
## 13 0.03026780 0.2359 0.2446370370
## 14 0.12332989 0.0002 0.0003733333 **
## 15 0.09583706 0.0008 0.0011200000 *
## 16 0.14365058 0.0001 0.0002000000 **
## 17 0.14587762 0.0001 0.0002000000 **
## 18 0.14044299 0.0001 0.0002000000 **
## 19 0.24398180 0.0001 0.0002000000 **
## 20 0.02200670 0.2878 0.2878000000
## 21 0.06593269 0.0121 0.0147304348 .
## 22 0.06781063 0.0221 0.0257833333 .
## 23 0.27266725 0.0001 0.0002000000 **
## 24 0.26084260 0.0001 0.0002000000 **
## 25 0.27239244 0.0001 0.0002000000 **
## 26 0.07818081 0.0047 0.0059818182 *
## 27 0.13625479 0.0004 0.0006222222 **
## 28 0.15443402 0.0001 0.0002000000 **
its2PWAdonis2 %>% filter(p.adjusted > 0.05)
## pairs Df SumsOfSqs F.Model R2
## 1 Riley's Hump Shallow vs Riley's Hump Mesophotic 1 0.6958548 1.851042 0.04127087
## 2 Riley's Hump Mesophotic vs Upper Keys Mesophotic 1 0.4347921 1.342139 0.03026780
## 3 Tortugas Bank Mesophotic vs Lower Keys Mesophotic 1 0.3865517 1.192600 0.02200670
## p.value p.adjusted sig
## 1 0.1058 0.1139385
## 2 0.2359 0.2446370
## 3 0.2878 0.2878000
its2PWAdonis2Tab = its2PWAdonis2 %>% mutate(pairs = pairs, F.Model = round(F.Model, 3), R2 = round(R2,3), p.adjusted = round(p.adjusted, 4)) %>%dplyr::select(-p.value, -sig, -SumsOfSqs, -Df) %>%
flextable() %>%
flextable::compose(part = "header", j = "pairs", value = as_paragraph("Comparison")) %>%
flextable::compose(part = "header", j = "F.Model", value = as_paragraph(as_i("Pseudo-F"))) %>%
flextable::compose(part = "header", j = "R2", value = as_paragraph(as_i("R2"))) %>%
flextable::compose(part = "header", j = "p.adjusted", value = as_paragraph("p-value")) %>%
flextable::font(fontname = "Times New Roman", part = "all") %>%
flextable::fontsize(size = 10, part = "all") %>%
flextable::bold(part = "header") %>%
flextable::align(align = "left", part = "all") %>%
flextable::autofit()
table4 = read_docx()
table4 = body_add_flextable(table4, value = its2PWAdonis2Tab)
print(table4, target = "../tables/table4.docx")
options(sdmpredictors_datadir="../data/snps/bioOracle", timeout = max(300, getOption("timeout")))
flPal = paletteer_d("vapoRwave::jazzCup")[c(2:5)]
popData = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66, 68, 164, 166, 209, 211),] %>% dplyr::select(Sample, site, depthZone, latDD, longDD, depthM)
its2DistA = its2Profs %>% arrange(Sample)
its2Dist = vegdist(decostand(its2DistA[, c(6:ncol(its2DistA))], "normalize"), method = "bray")
datasets = list_datasets(terrestrial = FALSE, marine = TRUE, freshwater = FALSE)
# Extract present-day data sets
present = list_layers(datasets) %>% dplyr::select(dataset_code, layer_code, name, units, description, contains("cellsize"), version) %>% filter(version == 22) %>% filter(layer_code %in% c("BO22_damean", "BO22_parmean", "BO22_ph", "BO22_curvelmax_bdmean", "BO22_salinitymean_bdmean", "BO22_salinitymean_ss", "BO22_curvelmean_ss", "BO22_curvelmean_bdmean", "BO22_dissoxmean_bdmean", "BO22_lightbotmax_bdmean", "BO22_lightbotmean_bdmean", "BO22_nitratemean_bdmean", "BO22_tempmax_bdmean", "BO22_tempmean_bdmean", "BO22_ppmean_ss", "BO22_ppmean_bdmean", "BO22_chlomean_ss", "BO22_chlomean_bdmean"))
envVar = load_layers(present$layer_code)
itsEnvData = data.frame(popData, raster::extract(envVar, popData[,5:4]))[-131,]
corData = rcorr(as.matrix(itsEnvData[,c(6:ncol(itsEnvData))]), type = "pearson")
corDataFlat = melt(corData$r, value.name = "r")
pDataFlat = melt(corData$P, value.name = "p")
corDataBind = corDataFlat %>% left_join(pDataFlat, by = c("Var1","Var2"))
ggplot(corDataBind) +
geom_tile(aes(x = Var1, y = Var2, fill = r)) +
scale_fill_gradient2(low = "#3B9AB2FF", high = "#F21A00FF") +
geom_text(data = filter(corDataBind, r >= 0.7, p < 0.05),aes(x = Var1, y = Var2, label = round(r, 2))) +
geom_text(data = filter(corDataBind, r <= -0.7, p < 0.05),aes(x = Var1, y = Var2, label = round(r, 2))) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90))
itsEnvData2 = itsEnvData %>% dplyr::select("BO22_curvelmean_ss", "depthM", "BO22_lightbotmean_bdmean", "BO22_tempmean_bdmean", "BO22_ppmean_bdmean")
corData2 = cor(itsEnvData2)
corMelt2 = melt(corData2)
ggplot(corMelt2) +
geom_tile(aes(x = Var1, y = Var2, fill = value)) +
scale_fill_gradient2(low = "#3B9AB2FF", high = "#F21A00FF") +
geom_text(data = corMelt2[corMelt2$value >= 0.7,],aes(x = Var1, y = Var2, label = round(value, 2))) +
geom_text(data = corMelt2[corMelt2$value <= -0.7,],aes(x = Var1, y = Var2, label = round(value, 2))) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90))
vif = diag(solve(cor(itsEnvData2)))
vif
## BO22_curvelmean_ss depthM BO22_lightbotmean_bdmean
## 2.056388 1.664077 3.706417
## BO22_tempmean_bdmean BO22_ppmean_bdmean
## 3.200383 3.437381
itsDbrda0 = dbrda(its2Dist ~ 1, data = itsEnvData2)
itsDbrda1 = dbrda(its2Dist ~ ., data = itsEnvData2)
anova(itsDbrda1)
## Permutation test for dbrda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: dbrda(formula = its2Dist ~ BO22_curvelmean_ss + depthM + BO22_lightbotmean_bdmean + BO22_tempmean_bdmean + BO22_ppmean_bdmean, data = itsEnvData2)
## Df SumOfSqs F Pr(>F)
## Model 5 13.778 7.541 0.001 ***
## Residual 213 77.835
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RsquareAdj(itsDbrda1)
## $r.squared
## [1] 0.1503963
##
## $adj.r.squared
## [1] 0.1304526
set.seed(092)
bestItsDbrda <- ordiR2step(itsDbrda0, itsDbrda1)
## Step: R2.adj= 0
## Call: its2Dist ~ 1
##
## R2.adjusted
## <All variables> 0.13045258
## + depthM 0.07556852
## + BO22_curvelmean_ss 0.03225721
## + BO22_tempmean_bdmean 0.02022712
## + BO22_ppmean_bdmean 0.01667414
## + BO22_lightbotmean_bdmean 0.01473131
## <none> 0.00000000
##
## Df AIC F Pr(>F)
## + depthM 1 974.13 18.821 0.002 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: R2.adj= 0.07556852
## Call: its2Dist ~ depthM
##
## R2.adjusted
## <All variables> 0.13045258
## + BO22_curvelmean_ss 0.10492630
## + BO22_lightbotmean_bdmean 0.09605059
## + BO22_ppmean_bdmean 0.09267258
## + BO22_tempmean_bdmean 0.09002396
## <none> 0.07556852
##
## Df AIC F Pr(>F)
## + BO22_curvelmean_ss 1 968.05 8.1174 0.002 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: R2.adj= 0.1049263
## Call: its2Dist ~ depthM + BO22_curvelmean_ss
##
## R2.adjusted
## <All variables> 0.1304526
## + BO22_lightbotmean_bdmean 0.1255916
## + BO22_tempmean_bdmean 0.1210367
## + BO22_ppmean_bdmean 0.1189578
## <none> 0.1049263
##
## Df AIC F Pr(>F)
## + BO22_lightbotmean_bdmean 1 963.92 6.1048 0.002 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: R2.adj= 0.1255916
## Call: its2Dist ~ depthM + BO22_curvelmean_ss + BO22_lightbotmean_bdmean
##
## R2.adjusted
## + BO22_tempmean_bdmean 0.1310355
## <All variables> 0.1304526
## + BO22_ppmean_bdmean 0.1272419
## <none> 0.1255916
bestItsDbrda$anova
## R2.adj Df AIC F Pr(>F)
## + depthM 0.075569 1 974.13 18.8206 0.002 **
## + BO22_curvelmean_ss 0.104926 1 968.05 8.1174 0.002 **
## + BO22_lightbotmean_bdmean 0.125592 1 963.92 6.1048 0.002 **
## <All variables> 0.130453
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Now create best model and look at variance partitioning and significance with ANOVA
its2Model = itsEnvData2 %>% dplyr::select("Depth" = depthM, "Current" = BO22_curvelmean_ss, "Light" = BO22_lightbotmean_bdmean)
its2Dbrda = dbrda(its2Dist ~ Depth + Current + Light, its2Model)
itsDbrdaVarPart = varpart(its2Dist, ~ depthM, ~ BO22_curvelmean_ss, ~ BO22_lightbotmean_bdmean, data = itsEnvData2)
itsDbrdaDepth = dbrda(its2Dist ~ Depth, its2Model)
itsDbrdaCur = dbrda(its2Dist ~ Current, its2Model)
itsDbrdaLight = dbrda(its2Dist ~ Light, its2Model)
plot(itsDbrdaVarPart, Xnames = c("Depth", "Current", "Light"), bg = c(4:7), digits = 2)
itsDbrdaVarPart
##
## Partition of squared Bray distance in dbRDA
##
## Call: varpart(Y = its2Dist, X = ~depthM, ~BO22_curvelmean_ss,
## ~BO22_lightbotmean_bdmean, data = itsEnvData2)
##
## Explanatory tables:
## X1: ~depthM
## X2: ~BO22_curvelmean_ss
## X3: ~BO22_lightbotmean_bdmean
##
## No. of explanatory tables: 3
## Total variation (SS): 91.613
## No. of observations: 219
##
## Partition table:
## Df R.square Adj.R.square Testable
## [a+d+f+g] = X1 1 0.07981 0.07557 TRUE
## [b+d+e+g] = X2 1 0.03670 0.03226 TRUE
## [c+e+f+g] = X3 1 0.01925 0.01473 TRUE
## [a+b+d+e+f+g] = X1+X2 2 0.11314 0.10493 TRUE
## [a+c+d+e+f+g] = X1+X3 2 0.10434 0.09605 TRUE
## [b+c+d+e+f+g] = X2+X3 2 0.05629 0.04755 TRUE
## [a+b+c+d+e+f+g] = All 3 0.13762 0.12559 TRUE
## Individual fractions
## [a] = X1 | X2+X3 1 0.07804 TRUE
## [b] = X2 | X1+X3 1 0.02954 TRUE
## [c] = X3 | X1+X2 1 0.02067 TRUE
## [d] 0 0.00328 FALSE
## [e] 0 -0.00018 FALSE
## [f] 0 -0.00537 FALSE
## [g] 0 -0.00038 FALSE
## [h] = Residuals 0.87441 FALSE
## Controlling 1 table X
## [a+d] = X1 | X3 1 0.08132 TRUE
## [a+f] = X1 | X2 1 0.07267 TRUE
## [b+d] = X2 | X3 1 0.03282 TRUE
## [b+e] = X2 | X1 1 0.02936 TRUE
## [c+e] = X3 | X1 1 0.02048 TRUE
## [c+f] = X3 | X2 1 0.01530 TRUE
## ---
## Use function 'dbrda' to test significance of fractions of interest
set.seed(004)
anova(its2Dbrda)
## Permutation test for dbrda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: dbrda(formula = its2Dist ~ Depth + Current + Light, data = its2Model)
## Df SumOfSqs F Pr(>F)
## Model 3 12.608 11.437 0.001 ***
## Residual 215 79.005
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(003)
anova(itsDbrdaDepth)
## Permutation test for dbrda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: dbrda(formula = its2Dist ~ Depth, data = its2Model)
## Df SumOfSqs F Pr(>F)
## Model 1 7.312 18.821 0.001 ***
## Residual 217 84.302
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(002)
anova(itsDbrdaCur)
## Permutation test for dbrda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: dbrda(formula = its2Dist ~ Current, data = its2Model)
## Df SumOfSqs F Pr(>F)
## Model 1 3.362 8.2665 0.001 ***
## Residual 217 88.251
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(001)
anova(itsDbrdaLight)
## Permutation test for dbrda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: dbrda(formula = its2Dist ~ Light, data = its2Model)
## Df SumOfSqs F Pr(>F)
## Model 1 1.764 4.2594 0.001 ***
## Residual 217 89.850
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Prepare data and plot dbRDA
itsRdaVar = round(its2Dbrda$CA$eig/sum(its2Dbrda$CA$eig)*100, 1)
head(itsRdaVar)
## MDS1 MDS2 MDS3 MDS4 MDS5 MDS6
## 31.0 19.1 14.4 9.4 8.3 6.5
itsRdaVarFit = round(its2Dbrda$CCA$eig/sum(its2Dbrda$CCA$eig)*100, 1)
head(itsRdaVarFit)
## dbRDA1 dbRDA2 dbRDA3
## 64.0 22.6 13.3
sintI2P = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66,68,164,166,133,209,211),] %>% dplyr::select("sample" = tubeID, "pop" = site, "depth" = depthZone)
sintI2P$popdepth = paste(sintI2P$pop, sintI2P$depth)
its2RdaPoints = as.data.frame(scores(its2Dbrda, choices=c(1:4))$sites)
its2RdaPoints$sample = sintI2P$sample
head(its2RdaPoints)
## dbRDA1 dbRDA2 dbRDA3 MDS1 sample
## 1 -1.31925473 2.7092410 0.2738527 1.0905443 SFK001
## 2 -0.08489053 -0.7691498 0.9311652 0.4621761 SFK002
## 3 -0.09044651 -0.7642714 0.9875404 0.4700716 SFK003
## 4 -1.20625229 2.4780485 0.1118824 1.0099685 SFK004
## 5 -0.82781114 -2.7141200 0.5364448 -1.2136796 SFK005
## 6 -0.37603784 1.4691034 -1.2528053 0.6414877 SFK006
its2DbrdaData1 = sintI2P %>% left_join(its2RdaPoints)
## Joining with `by = join_by(sample)`
head(its2DbrdaData1)
## sample pop depth popdepth dbRDA1 dbRDA2
## 1 SFK001 Tortugas Bank Mesophotic Tortugas Bank Mesophotic -1.31925473 2.7092410
## 2 SFK002 Tortugas Bank Mesophotic Tortugas Bank Mesophotic -0.08489053 -0.7691498
## 3 SFK003 Tortugas Bank Mesophotic Tortugas Bank Mesophotic -0.09044651 -0.7642714
## 4 SFK004 Tortugas Bank Mesophotic Tortugas Bank Mesophotic -1.20625229 2.4780485
## 5 SFK005 Tortugas Bank Mesophotic Tortugas Bank Mesophotic -0.82781114 -2.7141200
## 6 SFK006 Tortugas Bank Mesophotic Tortugas Bank Mesophotic -0.37603784 1.4691034
## dbRDA3 MDS1
## 1 0.2738527 1.0905443
## 2 0.9311652 0.4621761
## 3 0.9875404 0.4700716
## 4 0.1118824 1.0099685
## 5 0.5364448 -1.2136796
## 6 -1.2528053 0.6414877
tail(its2DbrdaData1)
## sample pop depth popdepth dbRDA1 dbRDA2
## 214 SFK215 Upper Keys Mesophotic Upper Keys Mesophotic -1.31925473 2.7092410
## 215 SFK216 Upper Keys Mesophotic Upper Keys Mesophotic -1.31925473 2.7092410
## 216 SFK217 Upper Keys Shallow Upper Keys Shallow -0.82781114 -2.7141200
## 217 SFK218 Upper Keys Shallow Upper Keys Shallow 0.58660951 -0.7187489
## 218 SFK219 Upper Keys Shallow Upper Keys Shallow -0.82781114 -2.7141200
## 219 SFK220 Upper Keys Shallow Upper Keys Shallow -0.09044651 -0.7642714
## dbRDA3 MDS1
## 214 0.2738527 1.3028402
## 215 0.2738527 1.3028402
## 216 0.5364448 -1.1907650
## 217 -5.2578299 0.2963753
## 218 0.5364448 -1.1947380
## 219 0.9875404 0.4519316
its2EnvLoad = as.data.frame(its2Dbrda$CCA$biplot)
its2EnvLoad$var = row.names(its2EnvLoad)
its2EnvLoad$var = its2EnvLoad$var %>% gsub(pattern = "`", replacement = "", x = its2EnvLoad$var)
its2DbrdaData = merge(its2DbrdaData1, aggregate(cbind(mean.x = dbRDA1, mean.y2 = dbRDA2, mean.y3 = dbRDA3)~popdepth, its2DbrdaData1, mean), by="popdepth") %>% left_join(gssProf %>% dplyr::select(Sample, otu, count) %>% group_by(Sample) %>% summarise(count = max(count)) %>% left_join(gssProf %>% dplyr::select(Sample, otu, count)) %>% rename(sample = Sample, sym = otu) %>% dplyr::select(-count))
## Joining with `by = join_by(Sample, count)`
## Joining with `by = join_by(sample)`
its2DbrdaData$depth = factor(its2DbrdaData$depth)
its2DbrdaData$depth = factor(its2DbrdaData$depth, levels(its2DbrdaData$depth)[c(2,1)])
its2DbrdaData$pop = factor(its2DbrdaData$pop)
its2DbrdaData$pop = factor(its2DbrdaData$pop, levels(its2DbrdaData$pop)[c(4, 1, 3, 2)])
head(its2DbrdaData)
## popdepth sample pop depth dbRDA1 dbRDA2 dbRDA3
## 1 Lower Keys Mesophotic SFK101 Lower Keys Mesophotic -0.09679145 -0.7884120 0.8073779
## 2 Lower Keys Mesophotic SFK102 Lower Keys Mesophotic -0.82781114 -2.7141200 0.5364448
## 3 Lower Keys Mesophotic SFK103 Lower Keys Mesophotic -1.31925473 2.7092410 0.2738527
## 4 Lower Keys Mesophotic SFK104 Lower Keys Mesophotic -0.09044651 -0.7642714 0.9875404
## 5 Lower Keys Mesophotic SFK105 Lower Keys Mesophotic -0.83432981 -2.7412585 0.4317682
## 6 Lower Keys Mesophotic SFK106 Lower Keys Mesophotic -0.82781114 -2.7141200 0.5364448
## MDS1 mean.x mean.y2 mean.y3 sym
## 1 0.5933409 -0.3995191 -1.51736 0.4421128 C3-C1-C3.10
## 2 -1.0883171 -0.3995191 -1.51736 0.4421128 C3/C3.10
## 3 1.1986904 -0.3995191 -1.51736 0.4421128 C1/C3-C42.2-C1dl-C3gl-C3gm-C3gk
## 4 0.5702716 -0.3995191 -1.51736 0.4421128 C3-C1-C3.10
## 5 -1.1053055 -0.3995191 -1.51736 0.4421128 C3/C3.10
## 6 -1.1201014 -0.3995191 -1.51736 0.4421128 C3/C3.10
its2DbrdaPlotA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
geom_segment(data = its2EnvLoad, aes(x = 0, y = 0, xend = dbRDA1, yend = dbRDA2), color = "#F5065B", arrow = arrow(length = unit(0.15, "cm"), type = "open"), size = 0.65) +
geom_point(data = its2DbrdaData, aes(x = dbRDA1, y = dbRDA2, fill = sym, shape = depth), color = "black", size = 2, alpha = 1, position = position_jitter(seed = 1, width = 0.075, height = 0.075)) +
scale_shape_manual(values = c(24, 25), name = "Depth Zone") +
geom_text(data = its2EnvLoad[1,], aes(x = dbRDA1-0.2, y = dbRDA2, label = var), color = "#F5065B", size = 3, fontface = "bold") +
geom_text(data = its2EnvLoad[2,], aes(x = dbRDA1, y = dbRDA2+0.2, label = var), color = "#F5065B", size = 3, fontface = "bold") +
geom_text(data = its2EnvLoad[3,], aes(x = dbRDA1-0.15, y = dbRDA2-0.1, label = var), color = "#F5065B", size = 3, fontface = "bold") +
scale_fill_manual(values = profPal[-c(3,6,7,8,18)], name = expression(paste(italic("ITS2"), " type profile"))) +
labs(title = "Symbiodiniaceae", x = paste0("dbRDA 1 (", itsRdaVarFit[1], "% [",itsRdaVar[1], "%])"), y = paste0("dbRDA 2 (", itsRdaVarFit[2], "% [",itsRdaVar[2], "%])")) +
guides(shape = guide_legend(override.aes = list(size = 3, stroke = 0.5, alpha = NA), order = 2), fill = guide_legend(override.aes = list(shape = 22, size = 4), order = 1))+
theme_bw()
its2DbrdaPlot = its2DbrdaPlotA +
theme(plot.title = element_text(color = "black", size = 12),
axis.title.x = element_text(color = "black", size = 10),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line.x = element_blank(),
axis.title.y = element_text(color = "black", size = 10),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.spacing = unit(-5, "pt"),
legend.key.size = unit(5, "pt"),
legend.position = "right",
legend.title = element_text(size = 10),
legend.text = element_text(size = 8),
panel.border = element_rect(color = "black", size = 1),
panel.background = element_rect(fill = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
its2DbrdaPlot
dbRdaPlots = (sintDbrdaPlot / its2DbrdaPlot) +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 16))
ggsave("../figures/figure4.png", plot = dbRdaPlots, height = 18, width = 18, units = "cm", dpi = 300)
ggsave("../figures/figure4.svg", plot = dbRdaPlots, height = 18, width = 18, units = "cm", dpi = 300)
save.image("fknmsSint.RData")