Use the “Import Dataset” button within RStudio to download an Excel file from the following URL: http://mbio.asm.org/lookup/suppl/doi:10.1128/mBio.01122-15/-/DCSupplemental/mbo004152416st1.xlsx
You will have to hit the “Update” button to see a preview of the data before downloading. Adjust the value of the “skip” option so that the column names will be correct. If you like, change the name of the variable that the data will be stored in.
The following code is a result of the ‘Import Dataset’ and ‘Update’ button activity from above
library(readxl)
url <- "http://mbio.asm.org/lookup/suppl/doi:10.1128/mBio.01122-15/-/DCSupplemental/mbo004152416st1.xlsx"
destfile <- "mbo004152416st1.xlsx"
curl::curl_download(url, destfile)
dat1 <- read_excel(destfile, skip = 1)
library(readxl)
url <- "http://mbio.asm.org/lookup/suppl/doi:10.1128/mBio.01122-15/-/DCSupplemental/mbo004152416st2.xlsx"
destfile <- "mbo004152416st2.xlsx"
curl::curl_download(url, destfile)
dat2 <- read_excel(destfile, skip = 1)
url <- "http://mbio.asm.org/lookup/suppl/doi:10.1128/mBio.01122-15/-/DCSupplemental/mbo004152416st3.xlsx"
destfile <- "dat3.xlsx"
curl::curl_download(url, destfile)
dat3 <- read_excel(destfile, skip = 1)
names(dat1)[1] <- names(dat2)[1] # Need this to have valid rbind of data.frames
full_dat <- rbind(dat1,dat2,dat3)
names(full_dat) <- make.names(names(full_dat)) # gets rid of spaces in names
names(full_dat)[7] <- 'Normalized.Cell.Count' # Less crazy
reshape2
package, melt the new single data set so that measured experimental attributes are in a single column.library(reshape2)
melted_data <- melt(full_dat, id.vars = 1:5)
ggplot
to plot density curves for the two assay readouts (percent positive virus and cell count) simultaneuously.library(ggplot2)
ggplot(melted_data, aes(value))+
geom_density()+
facet_wrap(~variable, nrow=2, scales='free')
aggregate
or dcast
to find the median value of both readouts for each gene (Gene Symbol) and put this aggregation into a new table. Save that table as a csv file and try opening it in Excel. Did you get the results you expected? Try modifying the arguments to write.csv so you get what you would want if you were giving this to a collaborator.output <- dcast(melted_data, `Gene.Symbol`~variable, median)
# output <- aggregate(cbind(Percent.Positive.Virus, Normalized.Cell.Count)~Gene.Symbol, full_dat, median)
write.csv(output, file='test.csv', row.names = F)
# Method 1
o <- rev(order(output$Percent.Positive.Virus))
top_10_genes <- output$Gene.Symbol[o[1:10]]
# Method 2
top_10_genes <- dplyr::arrange(output, desc(Percent.Positive.Virus))[1:10,'Gene.Symbol']
# Method 3
top_10_genes <- dplyr::top_n(output, 10, Percent.Positive.Virus)$Gene.Symbol
%in%
operator).new_table <- dplyr::filter(full_dat, Gene.Symbol %in% top_10_genes)
ggplot(new_table, aes(Percent.Positive.Virus, Normalized.Cell.Count))+
geom_point() +
scale_x_log10()
## Warning: Transformation introduced infinite values in continuous x-axis
aes(label = )
and geom_text
)ggplot(new_table, aes(Percent.Positive.Virus, Normalized.Cell.Count, label = Gene.Symbol))+
geom_point()+geom_text(hjust=0.5,vjust=0)+
scale_x_log10()
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous x-axis
ggrepel
. Read its documentation and use it to make it so your labels in step 11 aren’t so ovelapping and messy.library(ggrepel)
ggplot(new_table, aes(Percent.Positive.Virus, Normalized.Cell.Count, label = Gene.Symbol))+
geom_point()+geom_text_repel()+
scale_x_log10()
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous x-axis
dev.off()
. Now open the pdf file on your computer to see that the two graphs are on seperate pages.plt1 <- ggplot(new_table, aes(Percent.Positive.Virus, Normalized.Cell.Count,
label = Gene.Symbol))+
geom_point()+geom_text(hjust=0.5,vjust=0)+
scale_x_log10()
plt2 <- ggplot(new_table, aes(Percent.Positive.Virus, Normalized.Cell.Count, label = Gene.Symbol))+
geom_point()+geom_text_repel()+
scale_x_log10()
pdf('graphs.pdf')
print(plt1)
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous x-axis
print(plt2)
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous x-axis
dev.off()
## quartz_off_screen
## 2