Context: Changes in the epigenetic landscape and the gene expression profile of rod photoreceptors during aging.

Datasets:
* ATAC-seq: to assess chromatin accessibility
* RNA-seq: to asses gene expression

Samples:
* 4 groups of age: 3M, 12M, 18M, 24M (mouse)
* 4 replicates per group

Differential analysis over time
* Differential chromatin accessibility: featureCount (quantify peaks) + limma (differential analysis) (my work) -> 3378 DARs
* Differential gene expression: limma (colleague’s work) -> 973 DEGs
Visualization of the DARs: (heatmap with pheatmap)
Caption for the picture. Caption for the picture.

Annotation of the DARs
* Serial annotation:
- Promoter overlap (-1000 +50 bp around TSS)
- Gene body overlap
- Nearest upstream TSS and nearest downstream TSS
* Bedtools (not an R package) and R to prepare the input files, then assemble, refine and filter the final tables

Integration DAR-DEG
* Venn diagram: Venny (online)
Caption for the picture. * Correlation between changes in chromatin accessibility and changes in gene expression at 24M?

setwd("/Volumes/NnrlChromatin/Cathy_Jaeger/CJ1b_ATAC-Aging_2018.02.08/DownstreamOf2018.10.01/Comparison_ATAC.RNA/2018.12.10/")
library(tidyverse)

Read 24M-DAR annotation file

DAR.annot.24 <- read.delim("/Volumes/NnrlChromatin/Cathy_Jaeger/CJ1b_ATAC-Aging_2018.02.08/DownstreamOf2018.10.01/Peak_annotation/AnnotationByDistance_2018.12.05/DARs.annot.24M.tsv", header = TRUE) 

Retrieve differentially expressed genes

DEG <- read.delim("/Volumes/NnrlChromatin/Cathy_Jaeger/CJ1c_RNA-Aging_2018.06/RNA-seq_aging/180912_Analysis/R_output/DE_sig-MSTR_5CPM15FC05FDRV2.tsv", header = TRUE)
colnames(DEG)[1] <- "GeneID" # Rename first column
colnames(DEG)[12] <- "log2FC.DEG.12"
colnames(DEG)[13] <- "log2FC.DEG.18"
colnames(DEG)[14] <- "log2FC.DEG.24"

Join DAR and DEG info and retain only DEG that contain a DAR in their promoter or gene body

DAR.annot.24.prom.genebody.DEG <- DAR.annot.24[DAR.annot.24$Category!="Intergenic",] # Keep DARs that overlap a promoter or gene body
DAR.annot.24.prom.genebody.DEG <- DAR.annot.24.prom.genebody.DEG[which(DAR.annot.24.prom.genebody.DEG$GeneID %in% DEG$GeneID),] # Keep genes that are in the DEG list
DAR.annot.24.prom.genebody.DEG <- left_join(DAR.annot.24.prom.genebody.DEG, DEG, by = "GeneID") # Add info of DEG table
genelist <- unique(as.character(DAR.annot.24.prom.genebody.DEG$GeneID)) # Retrieve list of unique gene IDs

Plot log2FC DEG ~ log2FC DAR

DAR.annot.24.prom.genebody.DEG %>% ggplot(aes(log2FC.DEG.24, log2FC.DAR)) + geom_point(alpha = 0.2) + theme_classic() + geom_smooth(method='lm') + facet_grid(.~ Category)

Test correlation log2FC DEG ~ log2FC DAR

cor.test(DAR.annot.24.prom.genebody.DEG$log2FC.DEG.24, DAR.annot.24.prom.genebody.DEG$log2FC.DAR, method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  DAR.annot.24.prom.genebody.DEG$log2FC.DEG.24 and DAR.annot.24.prom.genebody.DEG$log2FC.DAR
## t = 36.46, df = 721, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7779661 0.8294231
## sample estimates:
##       cor 
## 0.8052051

Perform GO enrichment analysis

library(clusterProfiler)
library(org.Mm.eg.db)
enrichGO.CC <- enrichGO(genelist, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "CC", readable = T)  # No significant enrichment
enrichGO.MF <- enrichGO(genelist, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "MF", readable = T)
enrichGO.BP <- enrichGO(genelist, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = T)  # No significant enrichment

Plot GO enrichment results

dotplot(enrichGO.MF)