APPENDIX – MODIFIED PROGRAMS IN THE CPACOR WITH AN APPLICATION
The following programs are the programs in Lehne et al. [95] with some modifications and an application to a GEO IDAT data set at https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi? acc=GSE69636. The following program can also be downloaded from the author’s website, https: //www. memphis. edu/sph/contact/faculty_ profiles/zhang.php.
# l\_IntensitiesRev.R
# Retrieves signal-intensities from Illumina idat-files. Returns
# detection p-values and signal intensities for genomic CpGs
# and control probes. Originally written by Benjamin Lehne
# (Imperial College London) and Alexander Drong (Oxford University).
# Modified by Hongmei Zhang (University of Memphis) with an
# application to a GEO dataset.
require(minfi)
require(IlluminaHumanMethylat ion450kmanifest)
#read data
rgSet = read.metharray. expO'idat/" , verbose=TRUE)
RGset = bgcorrect.illumina(rgSet)# Illumina background subtraction
# Type II probes
Typell.Name = getProbelnfo(RGset, type = "II")$Name
Typell.Green = getGreen(RGset)[getProbelnfo(RGset, type = "II")
SAddress,]
Typell.Red = getRed(RGset)[getProbelnfo(RGset, type = "II") SAddress,]
colnames(Typell.Red) = sampleNames(RGset) colnames(Typell.Green) = sampleNames(RGset)
# Type I probes, split into green and red channels
Typel.Green.Name = getProbelnfo(RGset, type = "I-Green")$Name Typel.Green.M = getGreen(RGset)[getProbelnfo(RGset, type =
"I-Green")$AddressB,]
rownames(Typel.Green.M) = Typel.Green.Name colnames(Typel.Green.M) = sampleNames(RGset)
Typel.Green.U = getGreen(RGset)[getProbelnfo(RGset, type = "I-Green")$AddressA,] rownames(Typel.Green.U) = Typel.Green.Name colnames(Typel.Green.U) = sampleNames(RGset)
Typel.Red.Name = getProbelnfo(RGset, type = "I-Red")$Name Typel.Red.M = getRed(RGset)[getProbelnfo(RGset, type = "I-Red") SAddressB,]
rownames(Typel.Red.M) = Typel.Red.Name colnames(Typel.Red.M) = sampleNames(RGset)
Typel.Red.U = getRed(RGset)[getProbelnfo(RGset, type = "I-Red") SAddressA,]
rownames(Typel.Red.U) = Typel.Red.Name colnames(Typel.Red.U) = sampleNames(RGset)
#BSC1 control probes
BSCI.Green.Name = getProbelnfo(RGset, type = "Control")[16:18,] SExtendedType
BSCI.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(BSCI.Green.Name), dimnames = list(BSCI.Green.Name, sampleNames(RGset)))
BSCI.Green [BSCI.Green.Name,] = getGreen(RGset)[getProbelnfo(RGset, type = "Control")[16:18,]SAddress,]
BSCI.Red.Name = getProbelnfo(RGset, type = "Control")[22:24,] SExtendedType
BSCI.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = length(BSCI.Red.Name),
dimnames = list(BSCI.Red.Name, sampleNames(RGset)))
BSCI.Red[BSCI.Red.Name,] = getRed(RGset)[getProbelnfo(RGset, type
"Control")[22:24,]SAddress,]
#BSC2 control probes
BSCII.Red.Name = getProbelnfoCRGset, type = "Control")[28:31,]
SExtendedType
BSCII.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = lengthCBSCII.Red.Name),
dimnames = listCBSCII.Red.Name, sampleNames(RGset)))
BSCII.Red[BSCII.Red.Name,] = getRed(RGset)[getProbelnfoCRGset, type = "Control")[28:31,]SAddress,]
«STAINING
stain.Red.Name =getProbeInfo(RGset, type = "Control") [2,]
SExtendedType
stain.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = lengthCstain.Red.Name),
dimnames = list(stain.Red.Name, sampleNames(RGset)))
stain.Red[stain.Red.Name,] = getRed(RGset)[getProbelnfoCRGset,
type = "Control")[2,]$Address,]
stain.Green.Name = getProbelnfoCRGset, type = "Control")[4,]
SExtendedType
stain.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = lengthCstain.Green.Name),
dimnames = list(stain.Green.Name, sampleNames(RGset)))
stain.Green[stain.Green.Name,] = getGreen(RGset)[getProbelnfoCRGset,
type = "Control")[4,]$Address,]
«EXTENSION
extensionA.Red.Name =getProbeInfo(RGset, type = "Control")[7,]
SExtendedType
extensionA.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = lengthCextensionA.Red.Name),
dimnames = list(extensionA.Red.Name, sampleNames(RGset)))
extensionA.Red[extensionA.Red.Name,] = getRed(RGset)[getProbelnfoCRGset,
type = "Control")[7,]$Address,]
extensionT.Red.Name =getProbeInfo(RGset, type = "Control")[8,]
SExtendedType
extensionT.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = lengthCextensionT.Red.Name),
dimnames = list(extensionT.Red.Name, sampleNames(RGset)))
extensionT.Red[extensionT.Red.Name,] = getRed(RGset)[getProbelnfoCRGset,
type = "Control")[8,]$Address,]
extensionC.Green.Name = getProbelnfoCRGset, type = "Control")[9,]
SExtendedType
extensionC.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = lengthCextensionC.Green.Name),
dimnames = list(extensionC.Green.Name, sampleNames(RGset)))
extensionC.Green[extensionC.Green.Name,] = getGreen(RGset)[getProbelnfoCRGset,
type = "Control")[9,]$Address,]
extensionG.Green.Name = getProbelnfoCRGset, type = "Control") [10,] SExtendedType extensionG.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(extensionG.Green.Name), dimnames = list(extensionG.Green.Name, sampleNames(RGset))) extensionG.Green[extensionG.Green.Name,] = getGreen(RGset)[getProbeInfo(RGset, type = "Control")[10,]$Address,]
«HYBRIDISATION
hybridH.Green.Name = getProbelnfoCRGset, type = "Control") [11,]
SExtendedType
hybridH.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(hybridH.Green.Name),
dimnames = list(hybridH.Green.Name, sampleNames(RGset)))
hybridH.Green[hybridH.Green.Name,] = getGreen(RGset)[getProbeInfo(RGset,
type = "Control")[11,]$Address,]
hybridM.Green.Name = getProbeInfo(RGset, type = "Control")[12,]
SExtendedType
hybridM.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(hybridM.Green.Name),
dimnames = list(hybridM.Green.Name, sampleNames(RGset))) hybridM.Green[hybridM.Green.Name,] = getGreen(RGset)[getProbeInfo(RGset, type = "Control")[12,]$Address,]
hybridL.Green.Name = getProbeInfo(RGset, type = "Control")[13,]
SExtendedType
hybridL.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(hybridL.Green.Name),
dimnames = list(hybridL.Green.Name, sampleNames(RGset)))
hybridL.Green[hybridL.Green.Name,] = getGreen(RGset)[getProbeInfo(RGset,
type = "Control")[13,]$Address,]
«TARGET REMOVAL
target.Green.Name =getProbeInfo(RGset, type = "Control")[14:15,]
SExtendedType
target.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(target.Green.Name),
dimnames = list(target.Green.Name, sampleNames(RGset)))
target.Green[target.Green.Name,] = getGreen(RGset)[getProbelnfo(RGset,
type = "Control")[14:15,]SAddress,]
«Specificity I
sped .Green.Name =getProbeInf o(RGset, type = "Control") [32:34,]
SExtendedType
sped.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(specl.Green.Name),
dimnames = list (sped. Green. Name, sampleNames(RGset)))
sped. Green [sped. Green. Name,] = getGreen(RGset) [getProbeInfo(RGset,
type = "Control")[32:34,]$Address,] sped.Red.Name = getProbeInfo(RGset, type = "Control") [38:40,]
SExtendedType
sped.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = length(specl.Red.Name),
dimnames = list (sped. Red. Name, sampleNames(RGset))) sped .Red[sped .Red.Name,] = getRed(RGset) [getProbelnfо(RGset, type = "Control")[38:40,]$Address,]
#Specifidty II
specll.Red.Name = getProbelnfo(RGset, type = "Control")[44:46,] SExtendedType
specll.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = lengthCspecII.Red.Name),
dimnames = list(specll.Red.Name, sampleNames(RGset)))
specll.Red[specll.Red.Name,] = getRed(RGset)[getProbelnfoCRGset,
type = "Control")[44:46,]$Address,]
#N0N POLYMORPHIC
np.Red.Name =getProbeInfo(RGset, type = "Control") [47:48,] SExtendedType
np.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = lengthCnp.Red.Name),
dimnames = list(np.Red.Name, sampleNames(RGset))) np.Red[np.Red.Name,] = getRed(RGset)[getProbelnfо(RGset, type = "Control")[47:48,]SAddress,]
np.Green.Name =getProbeInfo(RGset, type = "Control")[49:50,] SExtendedType
np.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(np.Green.Name),
dimnames = list(np.Green.Name, sampleNames(RGset)))
np.Green[np.Green.Name,] = getGreen(RGset)[getProbelnfо(RGset,
type = "Control")[49:50,]$Address,]
«Normalisation
control=getProbeInfo(RGset, type = "Control") normC.Green.Name=control[control[,2]==’N0RM_C’,4] normC.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(normC.Green.Name),
dimnames = list(normC.Green.Name, sampleNames(RGset))) normC.Green[normC.Green.Name,] = getGreen(RGset)
[control [control[,2]==’N0RM_C’,1],] normG.Green.Name=control[control[,2]==’N0RM_G’,4] normG.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(normG.Green.Name),
dimnames = list(normG.Green.Name, sampleNames(RGset))) normG.Green[normG.Green.Name,] = getGreen(RGset)
[control [control[,2]==’N0RM_G’,1],] normA.Red.Name=control[control[,2]==’N0RM_A’,4] normA.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = length(normA.Red.Name),
dimnames = list(normA.Red.Name, sampleNames(RGset))) normA.Red[normA.Red.Name,] = getRed(RGset)
[control [control[,2]==’N0RM_A’,1],] normT.Red.Name=control[control[,2]==’N0RM_T’,4] normT.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = length(normT.Red.Name), dimnames = list(normT.Red.Name, sampleNames(RGset))) normT.Red[normT.Red.Name,] = getRed(RGset)
[control[control[,2]==’N0RM_T’,1],]
#combine Ctrl probe intensities
Ctrl = rbind(as.matrix(BSCI.Green), as.matrixCBSCI.Red), as.matrix(BSCII.Red), (stain.Red), (stain.Green),
- (extensionA.Red), (extensionT.Red), (extensionC.Green),
- (extensionG.Green), (hybridH.Green), (hybridM.Green),
- (hybridL.Green),as.matrix(target.Green),as.matrix(sped.Green), as .matrix (sped .Red), as. matrix (sped I .Red), (np.Red[l,]), (np.Red[2,]), (np.Green[1,]),(np.Green[2,]), as.matrix(normC.Green), as.matrix(normG.Green), as.matrix(normA.Red), as.matrix(normT.Red))
«detection p-values
dp = detectionP(RGset, type = "m+u")
#add data for the new samples if (existsC'Typell .Red. All")) {
Typell.Red.All <- cbind(TypeII.Red.All.Typell.Red)
Typell.Green.All <- cbind(TypeII.Green.All.Typell.Green) TypeI.Red.M.All <- cbind(TypeI.Red.M.All.Typel.Red.M)
TypeI.Red.U.All <- cbind(TypeI.Red.U.All.Typel.Red.U)
Typel.Green.H.All <- cbind(TypeI.Green.M.All.Typel.Green.M)
Typel.Green.U.All <- cbind(TypeI.Green.U.All,Typel.Green.U)
Ctrl.all <- rbindCctrl.all, t(ctrl)) dp.all <- cbind(dp.all, dp)
>
else {
Typell.Red.All <- Typell.Red Typell.Green.All <- Typell.Green TypeI.Red.M.All <- Typel.Red.M TypeI.Red.U.All <- Typel.Red.U Typel.Green.M.All <- Typel.Green.M Typel.Green.U.All <- Typel.Green.U Ctrl.all <- t(ctrl) dp.all <- dp > #PCA of control-probe intensities pea <- prcomp(na.omit(ctrl.all)) ctrlprobes.scores = pca$x
colnames(ctrlprobes.scores) = paste(colnames(ctrlprobes.scores),
’_cp’, sep=’’)
save(Typell.Red.All ,Typell.Green.All .TypeI.Red.M.All .TypeI.Red.U.All Typel.Green.M.All ,Typel.Green.U.All , file="../intensities.RData") save(ctrl.all,ctrlprobes.scores, file = "../ctrlprobes.RData") save(dp.all, file = ".,/detectionPvalue.RData")
# 2\_betasRev.R
# Sets NAs based on the detection p-value and calculates marker and
# sample call-rates. Filters samples based on sample calle-rate.
# Performs Quantile Normalisation and Calculates Beta-values.
# Originallly written by Benjamin Lehne (Imperial College London) and
# Alexander Drong (Oxford University)
# Modified by Hongmei Zhang (University of Memphis) with an
# application to a GEO dataset
require(limma)
anno=read.csv("/GPL13534_HumanMethylation450_15017482_v.1.1.csv", as.is=TRUE, skip = 7)
anno=anno[,c(’Infinium_Design_Type’,’Color_Channel’, ’CHR’,
’MAPINFO’, ’Name’)]
cas=anno [substr(annoSName, l,3)==’ch.’ & ! (annoSCHR ‘/.in"/. c(’X’, ’Y’)),] cgs=anno[substr(annoSName, l,2)==’cg’& ! (annoSCHR ‘/.in‘/, c(’X’, ’Y’)),] auto = c(cgs$Name, cas$Name) auto=as.matrix(auto)
«detection p-value thres=lE-16
load(’intensities.RData’) load(’detectionPvalue.RData’)
d=dp.all [rownames (Typed .Green. A11) ,colnamesdypell .Green. All)]
Typed.Green. All.d = ifelse(d Typed .Red. A11 .d = if else(d d=dp.ad [rownames(Typel .Green.M. A11),colnamesdypel .Green.M. All)] Typel.Green.M.A11.d = ifelse(d Typel.Green.U.A11.d = ifelse(d Typel.Red.M.All.d = ifelse(d Typel.Red.U.A11.d = ifelse(d # autosomes ---------------------------------------------------------- samples=colnames(Typel.Red.M.A11) category=auto markers=as.matrix(intersect(rownames(TypelI.Green.A11.d), category)) Typed. Green = Typed. Green. A11 .d [markers, samples] Typed.Red = Typed .Red.A11 .d[markers,samples] markers=intersect(rownames(Typel.Green.M.A11.d), category) Typel.Green.M = Typel.Green.M.A11.d[markers.samples] Typel.Green.U = Typel.Green.U.A11.d[markers.samples] markers=intersect(rownames(Typel.Red.M.A11.d), category) Typel.Red.M = Typel.Red.M.A11.d[markers,samples] Typel.Red.U = Typel.Red.U.A11.d[markers,samples] #raw betas Typed.betas = Typed.Green/(Typed.Red+Typell .Green+100) Typel.Green.betas = Typel.Green.М/(Typel.Green.M+Typel.Green.U+100) Typel.Red.betas = Typel.Red.M/(Typel.Red.M+Typel.Red.U+100) beta = as .matrix (rbinddypel I .betas, Typel. Gr een. betas ,TypeI .Red. bet as)) sample.call=colSums(!is.na(beta))/nrow(beta) marker.call=rowSums(!is.na(beta))/ncol(beta) #save(sample.call, marker.call, file=’output/callRates.RData’) #save(beta, file=’output/beta_raw.RData’) #call-rate filtering callrate.thres=0.95 samples=names(sample.call[sample.call>callrate.thres] ) markers=as.matrix(intersect(rownames(TypelI.Green.All.d), category)) Typell.Green = Typell.Green.All.d [markers,samples] TypelI.Red = Typell.Red.All.d[markers,samples] markers=intersect(rownames(Typel.Green.M.All.d), category) Typel.Green.M = Typel.Green.M.All.d[markers.samples] Typel.Green.U = Typel.Green.U.All.d [markers.samples] markers=intersect(rownames(Typel.Red.M.All.d), category) Typel.Red.M = Typel.Red.M.All.d [markers,samples] Typel.Red.U = Typel.Red.U.All.d[markers,samples] rm(TypelI.Green.All.d,TypelI.Red.All.d,Typel.Green.M.All.d, Typel.Green.U.All.d,Typel.Red.M.All.d,Typel.Red.U.All.d) #QN Typell.Green=normalizeQuantiles(Typell.Green) TypelI.Red = normalizeQuantiles(Typell.Red) Typel.Green.M = normalizeQuantiles(Typel.Green.M) Typel.Green.U = normalizeQuantiles(Typel.Green.U) Typel.Red.M = normalizeQuantiles(Typel.Red.M) Typel.Red.U = normalizeQuantiles(Typel.Red.U) Typell.betas = Typell.Green/(Typell.Red+Typell.Green+100) Typel.Green.betas = Typel.Green.М/(Typel.Green.M+Typel.Green.U+100) Typel.Red.betas = Typel.Red.M/(Typel.Red.M+Typel.Red.U+100) beta = as.matrix(rbind(TypelI.betas,Typel.Green.betas,Typel.Red.betas)) rm(TypeII.Green,Typell.Red,Typel.Green.M,Typel.Green.U, Typel.Red.M,Typel.Red.U, Typell.betas,Typel.Green.betas,Typel.Red.betas) save(beta, file="beta_QN.RData") Screening