M2 BIBS, TP du cours d'analyse de données génomiques
2014-2015
Voir ici (commencer par le README.md)
Installation du package 'jointseg' depuis R:
install.packages("jointseg", repos = "http://R-Forge.R-project.org")
install.packages("cghseg")
source("http://bioconductor.org/biocLite.R")
biocLite("DNAcopy")
gamma <- rep(c(2, 3, 2, 1),
times=c(100, 200, 300, 400)*100)
len <- length(gamma)
set.seed(1)
y <- rnorm(len, mean=gamma)
plot(y)
lines(gamma, col=2, lwd=2)
library("cghseg")
#simul = simulprofiles(M=5,n=100,k.mean=2,SNR=5,lambda=1)
CGHd <- new("CGHdata",Y=y)
CGHo <- new("CGHoptions")
system.time(CGHr <- uniseg(CGHd,CGHo))
system.time(res <- cghseg:::segmeanCO(y, 5))
system.time(res <- cghseg:::segmeanCO(y, 100))
summary(CGHr)
bkp <- getbp(CGHr)$Y
which(bkp$bp==1)
## Questions:
## - refaire la segmentation avec un 'y' plus difficile
## - segmenter avec la fonction 'segment' du package "DNAcopy"
library("DNAcopy")
chrom <- rep(1, len)
maploc <- 1:len
system.time(res <- segment(CNA(y, chrom, maploc)))
res$output
bkpCBS <- res$output$loc.end
bkpCBS <- bkpCBS[1:(length(bkpCBS)-1)]
plot(y)
lines(gamma, col=2, lwd=2)
abline(v=which(bkp$bp==1), col="cyan", lwd=2)
abline(v=bkpCBS, col="orange", lwd=2, lty=3)
## - comment dire quelle est la meilleure segmentation ?
library(jointseg)
## fused lasso
system.time(resFL <- doGFLars(y, K=100))
bkpFL <- resFL$bkp
## binary segmentation
system.time(resBS <- doRBS(y, K=100))
bkpBS <- resBS$bkp
bkpBS
plot(y)
lines(gamma, col=2, lwd=2)
abline(v=which(bkp$bp==1), col="cyan", lwd=2)
abline(v=bkpCBS, col="orange", lwd=2, lty=3)
abline(v=bkpFL[1:3], col="blue", lwd=3, lty=3)
abline(v=bkpBS[1:3], col="yellow", lwd=3, lty=3)
## FL+DP
resJ <- jointSeg(y, K=100, method="GFLars")
resJ$dpBkpList[[3]] ## best segmentation with 3 breakpoints
## BS+DP
resJBS <- jointSeg(y, K=100, method="RBS")
resJBS$dpBkpList[[3]] ## best segmentation with 3 breakpoints
resJBS$bestBkp