## Written by Josefin Stiller, 2023 library(tidyverse) library(magrittr) library(fuzzyjoin) ## Set working directory to where this R script sits setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) t <- read_tsv("master_table_gene_trees.txt") t %>% filter(chromosome=="chr4") %>% filter(start >= 25030000 & end <= 32670000 | start >= 33510000 & end <= 34470000 | start >= 44130000 & end <= 56810000) %>% filter(part_of_63K=="Y") %>% summarize(n = n()) t %>% filter(chromosome=="chr4") %>% summarize(n = n()) ## Some cleaning of the data t %<>% filter(!is.na(chromosome)) # Keep only 94K intergenic regions (not exonic, intronic or UCE gene trees) t %<>% filter(!is.na(chr_type)) # Keep only gene trees that can be assigned to a chromosome (removes a few on linkage groups) t %<>% filter(!is.na(start)) # Keep only gene trees on chromsomes with clear names, remove loci on e.g. chr1_AADN03009258 ## Some transformations to convert start coordinate to Mb t$chr_type <- factor(t$chr_type, levels = c("macrochromosome", "intermediate_chromosome", "microchromosome", "sex_chromosome")) t$start_Mb <- t$start/1000000 ## Just for overview, calculate the average normalized RF by chromosome, use the values calculated on the gene trees where low support branches were collapsed. Those are also the gene trees that were used in the species tree reconstruction. t %>% group_by(chr_type) %>% summarize(mean=mean(norm_RF_collapsed)) ## Mean collapsed RF t %>% summarize(mean=mean(norm_RF_collapsed)) ## Add data for the recombination rate for the third plot. From Elferink et al. (2010) BMC Genetics 11, 11. ## Data contains recombination rate over bins of 500 kb of the chicken genome, which we adopt here recomb <- read_tsv("12863_2009_758_MOESM5_ESM.tsv") ## Perform some cosmetics on chromosome labels and add chromosome types, so they can be merged with our data recomb$chromosome <- gsub("GGA", "", recomb$Chromosome) recomb$chromosome <- gsub("GG", "", recomb$chromosome) recomb$chromosome <- paste0("chr", recomb$chromosome) ## Join the master dataset and the recombination rates according to the binned coordinates of the recombination dataset. genome_join(t, recomb, by = c("chromosome" = "chromosome", "start" = "location_From", "end" = "location_to")) -> t ## To correctly sort chromosomes numerically t$chr_order = factor(t$chromosome.x, levels=c(paste("chr", seq(1:28), sep=""), "chrZ")) ## Prepare table to have 3 panels for plotting of normalized RF distance, GC standard deviation and recombination rate along bins of 500 kb of each chromosome t %<>% ## Rename panel variables for plotting, add a line break to fit better mutate(`Recomb.\nrate` = Combined_male_cM) %>% group_by(chr_type, chr_order, chromosome.x, location_From, `Recomb.\nrate`) %>% ## Calculate the mean normalized RF and the standard deviation of GC content summarize(`Norm.\nRF distance` = mean(norm_RF_collapsed)*100, `GC\nstdev` = mean(GC_sd)) %>% gather(metric, value, `Recomb.\nrate`:`GC\nstdev`) ## Reorder the metrics according to the plot order t$metric <- factor(t$metric, levels=unique(t$metric)[c(2,3,1)]) ## Add the mean value for the the panels t %<>% group_by(metric) %>% mutate(mean = mean(value)) ## Write source data, only comparisons and variables that are shown in plot and renamed to be more meaningful t %>% rename(chromosome_type = chr_type, plot_order = chr_order, chromosome = chromosome.x, coordinate = location_From, genome_wide_mean = mean) %>% write_csv(., "Fig4c_source_data.csv") ## Plot Fig. 4c colors <- c("#5FBFA2", "#AACC8E", "#0091AD", "#4D4D4D") t %>% ggplot(aes(x=location_From, y=value, color=chr_type)) + geom_line(lwd=0.2) + facet_grid(metric~chr_order, scales="free", space="free_x", switch="x") + theme_classic() + geom_hline(aes(yintercept = mean, group=metric), color="grey30", linetype=2) + scale_color_manual(values=colors) + theme(legend.position="none", axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.line.x=element_blank(), panel.spacing.x=unit(0, "lines"), panel.spacing.y=unit(0.2, "lines"), strip.placement = "inside", strip.background.y = element_blank(), strip.background.x = element_blank(), plot.margin=unit(c(0,0,0,0), "cm")) + xlab("") + ylab("") + labs(color="Chromosome type") -> C ## Save plot to PDF ggsave("Fig4c.pdf", C) ## Save RData, used later in assembling the compound figure saveRDS(C, file="Fig4c.Rdata")