APPENDIX – MODIFIED PROGRAMS IN THE CPACOR WITH AN APPLICATION

The following programs are the programs in Lehne et al. [95] with some modifications and an application to a GEO IDAT data set at https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi? acc=GSE69636. The following program can also be downloaded from the author’s website, https: //www. memphis. edu/sph/contact/faculty_ profiles/zhang.php.

# l\_IntensitiesRev.R

# Retrieves signal-intensities from Illumina idat-files. Returns

# detection p-values and signal intensities for genomic CpGs

# and control probes. Originally written by Benjamin Lehne

# (Imperial College London) and Alexander Drong (Oxford University).

# Modified by Hongmei Zhang (University of Memphis) with an

# application to a GEO dataset.

require(minfi)

require(IlluminaHumanMethylat ion450kmanifest)

#read data

rgSet = read.metharray. expO'idat/" , verbose=TRUE)

RGset = bgcorrect.illumina(rgSet)# Illumina background subtraction

# Type II probes

Typell.Name = getProbelnfo(RGset, type = "II")$Name

Typell.Green = getGreen(RGset)[getProbelnfo(RGset, type = "II")

SAddress,]

Typell.Red = getRed(RGset)[getProbelnfo(RGset, type = "II") SAddress,]

colnames(Typell.Red) = sampleNames(RGset) colnames(Typell.Green) = sampleNames(RGset)

# Type I probes, split into green and red channels

Typel.Green.Name = getProbelnfo(RGset, type = "I-Green")$Name Typel.Green.M = getGreen(RGset)[getProbelnfo(RGset, type =

"I-Green")$AddressB,]

rownames(Typel.Green.M) = Typel.Green.Name colnames(Typel.Green.M) = sampleNames(RGset)

Typel.Green.U = getGreen(RGset)[getProbelnfo(RGset, type = "I-Green")$AddressA,] rownames(Typel.Green.U) = Typel.Green.Name colnames(Typel.Green.U) = sampleNames(RGset)

Typel.Red.Name = getProbelnfo(RGset, type = "I-Red")$Name Typel.Red.M = getRed(RGset)[getProbelnfo(RGset, type = "I-Red") SAddressB,]

rownames(Typel.Red.M) = Typel.Red.Name colnames(Typel.Red.M) = sampleNames(RGset)

Typel.Red.U = getRed(RGset)[getProbelnfo(RGset, type = "I-Red") SAddressA,]

rownames(Typel.Red.U) = Typel.Red.Name colnames(Typel.Red.U) = sampleNames(RGset)

#BSC1 control probes

BSCI.Green.Name = getProbelnfo(RGset, type = "Control")[16:18,] SExtendedType

BSCI.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(BSCI.Green.Name), dimnames = list(BSCI.Green.Name, sampleNames(RGset)))

BSCI.Green [BSCI.Green.Name,] = getGreen(RGset)[getProbelnfo(RGset, type = "Control")[16:18,]SAddress,]

BSCI.Red.Name = getProbelnfo(RGset, type = "Control")[22:24,] SExtendedType

BSCI.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = length(BSCI.Red.Name),

dimnames = list(BSCI.Red.Name, sampleNames(RGset)))

BSCI.Red[BSCI.Red.Name,] = getRed(RGset)[getProbelnfo(RGset, type

"Control")[22:24,]SAddress,]

#BSC2 control probes

BSCII.Red.Name = getProbelnfoCRGset, type = "Control")[28:31,]

SExtendedType

BSCII.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = lengthCBSCII.Red.Name),

dimnames = listCBSCII.Red.Name, sampleNames(RGset)))

BSCII.Red[BSCII.Red.Name,] = getRed(RGset)[getProbelnfoCRGset, type = "Control")[28:31,]SAddress,]

«STAINING

stain.Red.Name =getProbeInfo(RGset, type = "Control") [2,]

SExtendedType

stain.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = lengthCstain.Red.Name),

dimnames = list(stain.Red.Name, sampleNames(RGset)))

stain.Red[stain.Red.Name,] = getRed(RGset)[getProbelnfoCRGset,

type = "Control")[2,]$Address,]

stain.Green.Name = getProbelnfoCRGset, type = "Control")[4,]

SExtendedType

stain.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = lengthCstain.Green.Name),

dimnames = list(stain.Green.Name, sampleNames(RGset)))

stain.Green[stain.Green.Name,] = getGreen(RGset)[getProbelnfoCRGset,

type = "Control")[4,]$Address,]

«EXTENSION

extensionA.Red.Name =getProbeInfo(RGset, type = "Control")[7,]

SExtendedType

extensionA.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = lengthCextensionA.Red.Name),

dimnames = list(extensionA.Red.Name, sampleNames(RGset)))

extensionA.Red[extensionA.Red.Name,] = getRed(RGset)[getProbelnfoCRGset,

type = "Control")[7,]$Address,]

extensionT.Red.Name =getProbeInfo(RGset, type = "Control")[8,]

SExtendedType

extensionT.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = lengthCextensionT.Red.Name),

dimnames = list(extensionT.Red.Name, sampleNames(RGset)))

extensionT.Red[extensionT.Red.Name,] = getRed(RGset)[getProbelnfoCRGset,

type = "Control")[8,]$Address,]

extensionC.Green.Name = getProbelnfoCRGset, type = "Control")[9,]

SExtendedType

extensionC.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = lengthCextensionC.Green.Name),

dimnames = list(extensionC.Green.Name, sampleNames(RGset)))

extensionC.Green[extensionC.Green.Name,] = getGreen(RGset)[getProbelnfoCRGset,

type = "Control")[9,]$Address,]

extensionG.Green.Name = getProbelnfoCRGset, type = "Control") [10,] SExtendedType extensionG.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(extensionG.Green.Name), dimnames = list(extensionG.Green.Name, sampleNames(RGset))) extensionG.Green[extensionG.Green.Name,] = getGreen(RGset)[getProbeInfo(RGset, type = "Control")[10,]$Address,]

«HYBRIDISATION

hybridH.Green.Name = getProbelnfoCRGset, type = "Control") [11,]

SExtendedType

hybridH.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(hybridH.Green.Name),

dimnames = list(hybridH.Green.Name, sampleNames(RGset)))

hybridH.Green[hybridH.Green.Name,] = getGreen(RGset)[getProbeInfo(RGset,

type = "Control")[11,]$Address,]

hybridM.Green.Name = getProbeInfo(RGset, type = "Control")[12,]

SExtendedType

hybridM.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(hybridM.Green.Name),

dimnames = list(hybridM.Green.Name, sampleNames(RGset))) hybridM.Green[hybridM.Green.Name,] = getGreen(RGset)[getProbeInfo(RGset, type = "Control")[12,]$Address,]

hybridL.Green.Name = getProbeInfo(RGset, type = "Control")[13,]

SExtendedType

hybridL.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(hybridL.Green.Name),

dimnames = list(hybridL.Green.Name, sampleNames(RGset)))

hybridL.Green[hybridL.Green.Name,] = getGreen(RGset)[getProbeInfo(RGset,

type = "Control")[13,]$Address,]

«TARGET REMOVAL

target.Green.Name =getProbeInfo(RGset, type = "Control")[14:15,]

SExtendedType

target.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(target.Green.Name),

dimnames = list(target.Green.Name, sampleNames(RGset)))

target.Green[target.Green.Name,] = getGreen(RGset)[getProbelnfo(RGset,

type = "Control")[14:15,]SAddress,]

«Specificity I

sped .Green.Name =getProbeInf o(RGset, type = "Control") [32:34,]

SExtendedType

sped.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(specl.Green.Name),

dimnames = list (sped. Green. Name, sampleNames(RGset)))

sped. Green [sped. Green. Name,] = getGreen(RGset) [getProbeInfo(RGset,

type = "Control")[32:34,]$Address,] sped.Red.Name = getProbeInfo(RGset, type = "Control") [38:40,]

SExtendedType

sped.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = length(specl.Red.Name),

dimnames = list (sped. Red. Name, sampleNames(RGset))) sped .Red[sped .Red.Name,] = getRed(RGset) [getProbelnfо(RGset, type = "Control")[38:40,]$Address,]

#Specifidty II

specll.Red.Name = getProbelnfo(RGset, type = "Control")[44:46,] SExtendedType

specll.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = lengthCspecII.Red.Name),

dimnames = list(specll.Red.Name, sampleNames(RGset)))

specll.Red[specll.Red.Name,] = getRed(RGset)[getProbelnfoCRGset,

type = "Control")[44:46,]$Address,]

#N0N POLYMORPHIC

np.Red.Name =getProbeInfo(RGset, type = "Control") [47:48,] SExtendedType

np.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = lengthCnp.Red.Name),

dimnames = list(np.Red.Name, sampleNames(RGset))) np.Red[np.Red.Name,] = getRed(RGset)[getProbelnfо(RGset, type = "Control")[47:48,]SAddress,]

np.Green.Name =getProbeInfo(RGset, type = "Control")[49:50,] SExtendedType

np.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(np.Green.Name),

dimnames = list(np.Green.Name, sampleNames(RGset)))

np.Green[np.Green.Name,] = getGreen(RGset)[getProbelnfо(RGset,

type = "Control")[49:50,]$Address,]

«Normalisation

control=getProbeInfo(RGset, type = "Control") normC.Green.Name=control[control[,2]==’N0RM_C’,4] normC.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(normC.Green.Name),

dimnames = list(normC.Green.Name, sampleNames(RGset))) normC.Green[normC.Green.Name,] = getGreen(RGset)

[control [control[,2]==’N0RM_C’,1],] normG.Green.Name=control[control[,2]==’N0RM_G,4] normG.Green = matrix(NA_real_, ncol = ncol(RGset), nrow = length(normG.Green.Name),

dimnames = list(normG.Green.Name, sampleNames(RGset))) normG.Green[normG.Green.Name,] = getGreen(RGset)

[control [control[,2]==’N0RM_G’,1],] normA.Red.Name=control[control[,2]==’N0RM_A’,4] normA.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = length(normA.Red.Name),

dimnames = list(normA.Red.Name, sampleNames(RGset))) normA.Red[normA.Red.Name,] = getRed(RGset)

[control [control[,2]==’N0RM_A’,1],] normT.Red.Name=control[control[,2]==’N0RM_T’,4] normT.Red = matrix(NA_real_, ncol = ncol(RGset), nrow = length(normT.Red.Name), dimnames = list(normT.Red.Name, sampleNames(RGset))) normT.Red[normT.Red.Name,] = getRed(RGset)

[control[control[,2]==’N0RM_T’,1],]

#combine Ctrl probe intensities

Ctrl = rbind(as.matrix(BSCI.Green), as.matrixCBSCI.Red), as.matrix(BSCII.Red), (stain.Red), (stain.Green),

  • (extensionA.Red), (extensionT.Red), (extensionC.Green),
  • (extensionG.Green), (hybridH.Green), (hybridM.Green),
  • (hybridL.Green),as.matrix(target.Green),as.matrix(sped.Green), as .matrix (sped .Red), as. matrix (sped I .Red), (np.Red[l,]), (np.Red[2,]), (np.Green[1,]),(np.Green[2,]), as.matrix(normC.Green), as.matrix(normG.Green), as.matrix(normA.Red), as.matrix(normT.Red))

«detection p-values

dp = detectionP(RGset, type = "m+u")

#add data for the new samples if (existsC'Typell .Red. All")) {

Typell.Red.All <- cbind(TypeII.Red.All.Typell.Red)

Typell.Green.All <- cbind(TypeII.Green.All.Typell.Green) TypeI.Red.M.All <- cbind(TypeI.Red.M.All.Typel.Red.M)

TypeI.Red.U.All <- cbind(TypeI.Red.U.All.Typel.Red.U)

Typel.Green.H.All <- cbind(TypeI.Green.M.All.Typel.Green.M)

Typel.Green.U.All <- cbind(TypeI.Green.U.All,Typel.Green.U)

Ctrl.all <- rbindCctrl.all, t(ctrl)) dp.all <- cbind(dp.all, dp)

>

else {

Typell.Red.All <- Typell.Red Typell.Green.All <- Typell.Green TypeI.Red.M.All <- Typel.Red.M TypeI.Red.U.All <- Typel.Red.U Typel.Green.M.All <- Typel.Green.M Typel.Green.U.All <- Typel.Green.U Ctrl.all <- t(ctrl) dp.all <- dp > #PCA of control-probe intensities pea <- prcomp(na.omit(ctrl.all)) ctrlprobes.scores = pca$x

colnames(ctrlprobes.scores) = paste(colnames(ctrlprobes.scores),

’_cp’, sep=’’)

save(Typell.Red.All ,Typell.Green.All .TypeI.Red.M.All .TypeI.Red.U.All Typel.Green.M.All ,Typel.Green.U.All , file="../intensities.RData") save(ctrl.all,ctrlprobes.scores, file = "../ctrlprobes.RData") save(dp.all, file = ".,/detectionPvalue.RData")

# 2\_betasRev.R

# Sets NAs based on the detection p-value and calculates marker and

# sample call-rates. Filters samples based on sample calle-rate.

# Performs Quantile Normalisation and Calculates Beta-values.

# Originallly written by Benjamin Lehne (Imperial College London) and

# Alexander Drong (Oxford University)

# Modified by Hongmei Zhang (University of Memphis) with an

# application to a GEO dataset

require(limma)

anno=read.csv("/GPL13534_HumanMethylation450_15017482_v.1.1.csv", as.is=TRUE, skip = 7)

anno=anno[,c(’Infinium_Design_Type’,’Color_Channel’, ’CHR’,

’MAPINFO’, ’Name’)]

cas=anno [substr(annoSName, l,3)==’ch.’ & ! (annoSCHR ‘/.in"/. c(’X’, ’Y’)),] cgs=anno[substr(annoSName, l,2)==’cg’& ! (annoSCHR ‘/.in‘/, c(’X’, ’Y’)),] auto = c(cgs$Name, cas$Name) auto=as.matrix(auto)

«detection p-value thres=lE-16

load(’intensities.RData’) load(’detectionPvalue.RData’)

d=dp.all [rownames (Typed .Green. A11) ,colnamesdypell .Green. All)]

Typed.Green. All.d = ifelse(d

Typed .Red. A11 .d = if else(d

d=dp.ad [rownames(Typel .Green.M. A11),colnamesdypel .Green.M. All)]

Typel.Green.M.A11.d = ifelse(d

Typel.Green.U.A11.d = ifelse(d

Typel.Red.M.All.d = ifelse(d

Typel.Red.U.A11.d = ifelse(d

# autosomes ----------------------------------------------------------

samples=colnames(Typel.Red.M.A11)

category=auto

markers=as.matrix(intersect(rownames(TypelI.Green.A11.d), category)) Typed. Green = Typed. Green. A11 .d [markers, samples]

Typed.Red = Typed .Red.A11 .d[markers,samples] markers=intersect(rownames(Typel.Green.M.A11.d), category)

Typel.Green.M = Typel.Green.M.A11.d[markers.samples]

Typel.Green.U = Typel.Green.U.A11.d[markers.samples] markers=intersect(rownames(Typel.Red.M.A11.d), category)

Typel.Red.M = Typel.Red.M.A11.d[markers,samples]

Typel.Red.U = Typel.Red.U.A11.d[markers,samples]

#raw betas

Typed.betas = Typed.Green/(Typed.Red+Typell .Green+100)

Typel.Green.betas = Typel.Green.М/(Typel.Green.M+Typel.Green.U+100) Typel.Red.betas = Typel.Red.M/(Typel.Red.M+Typel.Red.U+100) beta = as .matrix (rbinddypel I .betas, Typel. Gr een. betas ,TypeI .Red. bet as)) sample.call=colSums(!is.na(beta))/nrow(beta) marker.call=rowSums(!is.na(beta))/ncol(beta)

#save(sample.call, marker.call, file=’output/callRates.RData’) #save(beta, file=’output/beta_raw.RData’)

#call-rate filtering callrate.thres=0.95

samples=names(sample.call[sample.call>callrate.thres] )

markers=as.matrix(intersect(rownames(TypelI.Green.All.d), category))

Typell.Green = Typell.Green.All.d [markers,samples]

TypelI.Red = Typell.Red.All.d[markers,samples] markers=intersect(rownames(Typel.Green.M.All.d), category)

Typel.Green.M = Typel.Green.M.All.d[markers.samples]

Typel.Green.U = Typel.Green.U.All.d [markers.samples] markers=intersect(rownames(Typel.Red.M.All.d), category)

Typel.Red.M = Typel.Red.M.All.d [markers,samples]

Typel.Red.U = Typel.Red.U.All.d[markers,samples]

rm(TypelI.Green.All.d,TypelI.Red.All.d,Typel.Green.M.All.d,

Typel.Green.U.All.d,Typel.Red.M.All.d,Typel.Red.U.All.d)

#QN

Typell.Green=normalizeQuantiles(Typell.Green)

TypelI.Red = normalizeQuantiles(Typell.Red)

Typel.Green.M = normalizeQuantiles(Typel.Green.M)

Typel.Green.U = normalizeQuantiles(Typel.Green.U)

Typel.Red.M = normalizeQuantiles(Typel.Red.M)

Typel.Red.U = normalizeQuantiles(Typel.Red.U)

Typell.betas = Typell.Green/(Typell.Red+Typell.Green+100)

Typel.Green.betas = Typel.Green.М/(Typel.Green.M+Typel.Green.U+100)

Typel.Red.betas = Typel.Red.M/(Typel.Red.M+Typel.Red.U+100)

beta = as.matrix(rbind(TypelI.betas,Typel.Green.betas,Typel.Red.betas))

rm(TypeII.Green,Typell.Red,Typel.Green.M,Typel.Green.U,

Typel.Red.M,Typel.Red.U, Typell.betas,Typel.Green.betas,Typel.Red.betas) save(beta, file="beta_QN.RData")

Screening

 
Source
< Prev   CONTENTS   Source   Next >