Contents
Note
- This is Tab-Separated-Value, text file.
- This is the result coming from the cufflinks command for all samples.
- RSudio server at FGCZ is available once you get a B-Fabric account.
Go to RStudio server
- http://fgcz-genomics.uzh.ch/
- and start a new project
- [File]-[New Project], select Existing Directory, and select the bio373_2020/ folder.
Load data (in R environment)
> data <- read.table("kam_fpkm.tsv", header=T, row.names=1)
> head(data)
Correlation matrix
> cor(data)
Scatter plot
> plot(data$H1, data$H2)
> plot(data$H1, data$L1)
> plot(data$H1, data$H2, log="xy", xlim=c(0.001,100000), ylim=c(0.001, 100000))
> plot(data$H1, data$L1, log="xy", xlim=c(0.001,100000), ylim=c(0.001, 100000))
Note
- plot(x-axis data, y-axis data)
- log="xy": scaling logarithmically in x- and y-axis
- xlim: plot region of x-axis
- ylim: plot region of y-axis
Boxplot
> data <- read.table("kam_fpkm.tsv", header=T, row.names=1)
> head(data)
> hal <- t(data["AT5G16980",1:2])
> lyr <- t(data["AT5G16980",3:4])
> tb <- cbind(hal, lyr)
> colnames(tb) <- c("H", "L")
> boxplot(tb, ylab="FPKM", main="AT5G16980")
Scatter plot matrix
> pairs(data)
Plot scatter matrix and correlation matrix together
> library(psych)
> pairs.panels(data)
Load data (in R environment)
> data <- read.table("kam_fpkm.tsv", header=T, row.names=1)
> head(data)
> distance <- dist(t(data))
> distance
> clustering <- hclust(distance)
> plot(clustering)
> data.log2 <- log2(data+1)
> distance <- dist(t(data.log2))
> clustering <- hclust(distance)
> plot(clustering)
Question4
- What is the difference with and without scaling?
> library(ezRun)
> ezMdsPlot(data, sampleColors = c("red", "blue", "green", "yellow"), main="MDS plot")
Reference