## Written by Josefin Stiller 2023 library(tidyverse) library(magrittr) library(plotrix) ## Set working directory to where this R script sits setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) ## Read the master table with metrics for all gene trees t <- read.table("master_table_gene_trees.txt", header=T, sep="\t") ## Keep only intergenic regions and those that are assigned to chromosome types and those that are part of the 80k locus set (excluding exons) t %>% filter(datatype=="random_region" & !is.na(chr_type) & part_of_80K == "Y") -> t ## For each chromosome, calculate the mean % GC standard deviation and SE t %>% group_by(chromosome, chr_type) %>% summarize(mean=mean(GC_sd), se=std.error(GC_sd)) -> t2 ## Some reformatting of chromosome names for plotting t2$chr_num <- factor(gsub("chr", "", t2$chromosome), levels=c(1:28, "Z")) t2$chr_num[which(is.na(t2$chr_num))] <- "Z" ## Set up chromosome type colors colors <- c("#AACC8E","#5FBFA2","#0091AD", "#4D4D4D") ## Plot EDF10b t2 %>% ggplot(aes(x=chr_num, y=mean)) + geom_pointrange(aes(ymin=mean-se, ymax=mean+se, color=chr_type), size=0.3) + theme_classic(base_size = 6) + scale_color_manual(values=colors) + theme(legend.position = "none") + xlab("Chromosome") + geom_hline(yintercept = mean(t$GC_sd), linetype=2) + scale_y_continuous(name="% GC standard deviation", labels = percent, ## Add a bit more space at bottom for n gene trees limits=c(0.01,0.038)) + scale_fill_manual(values=colors) ## Save plot to PDF ggsave("EDF10b.pdf", width=9, height=6.1, units="cm") ## Sample size t %>% group_by(chromosome) %>% count() t %>% group_by(chr_type) %>% count()