Evaluate Differential Correlation — evalDiffCorr • decorate

Evaluate Differential Correlation between two subsets of data

Usage

evalDiffCorr(
  epiSignal,
  testVariable,
  gRanges,
  clustList,
  npermute = c(100, 10000, 1e+05),
  adj.beta = 0,
  rho = 0,
  sumabs.seq = 1,
  BPPARAM = bpparam(),
  method = c("sLED", "Box", "Box.permute", "Steiger.fisher", "Steiger", "Jennrich",
    "Factor", "Mann.Whitney", "Kruskal.Wallis", "Cai.max", "Chang.maxBoot", "LC.U",
    "WL.randProj", "Schott.Frob", "Delaneau", "deltaSLE"),
  method.corr = c("pearson", "kendall", "spearman")
)

# S4 method for EList,ANY,GRanges,list
evalDiffCorr(
  epiSignal,
  testVariable,
  gRanges,
  clustList,
  npermute = c(100, 10000, 1e+05),
  adj.beta = 0,
  rho = 0,
  sumabs.seq = 1,
  BPPARAM = bpparam(),
  method = c("sLED", "Box", "Box.permute", "Steiger.fisher", "Steiger", "Jennrich",
    "Factor", "Mann.Whitney", "Kruskal.Wallis", "Cai.max", "Chang.maxBoot", "LC.U",
    "WL.randProj", "Schott.Frob", "Delaneau", "deltaSLE"),
  method.corr = c("pearson", "kendall", "spearman")
)

# S4 method for matrix,ANY,GRanges,list
evalDiffCorr(
  epiSignal,
  testVariable,
  gRanges,
  clustList,
  npermute = c(100, 10000, 1e+05),
  adj.beta = 0,
  rho = 0,
  sumabs.seq = 1,
  BPPARAM = bpparam(),
  method = c("sLED", "Box", "Box.permute", "Steiger.fisher", "Steiger", "Jennrich",
    "Factor", "Mann.Whitney", "Kruskal.Wallis", "Cai.max", "Chang.maxBoot", "LC.U",
    "WL.randProj", "Schott.Frob", "Delaneau", "deltaSLE"),
  method.corr = c("pearson", "kendall", "spearman")
)

# S4 method for data.frame,ANY,GRanges,list
evalDiffCorr(
  epiSignal,
  testVariable,
  gRanges,
  clustList,
  npermute = c(100, 10000, 1e+05),
  adj.beta = 0,
  rho = 0,
  sumabs.seq = 1,
  BPPARAM = bpparam(),
  method = c("sLED", "Box", "Box.permute", "Steiger.fisher", "Steiger", "Jennrich",
    "Factor", "Mann.Whitney", "Kruskal.Wallis", "Cai.max", "Chang.maxBoot", "LC.U",
    "WL.randProj", "Schott.Frob", "Delaneau", "deltaSLE"),
  method.corr = c("pearson", "kendall", "spearman")
)

Arguments

epiSignal: matrix or EList of epigentic signal. Rows are features and columns are samples
testVariable: factor indicating two subsets of the samples to compare
gRanges: GenomciRanges corresponding to the rows of epiSignal
clustList: list of cluster assignments
npermute: array of two entries with min and max number of permutations
adj.beta: parameter for sLED
rho: a large positive constant such that A(X)-A(Y)+diag(rep(rho,p)) is positive definite. Where p is the number of features
sumabs.seq: sparsity parameter
BPPARAM: parameters for parallel evaluation
method: "sLED", "Box", "Box.permute", "Steiger.fisher", "Steiger", "Jennrich", "Factor", "Mann.Whitney", "Kruskal.Wallis", "Cai.max", "Chang.maxBoot", "LC.U", "WL.randProj", "Schott.Frob", "Delaneau", "deltaSLE"
method.corr: Specify type of correlation: "pearson", "kendall", "spearman"

Value

list of result by chromosome and clustList

Details

Correlation sturucture between two subsets of the data is evaluated with sparse-Leading-Eigenvalue-Driven (sLED) test:

Zhu, Lingxue, Jing Lei, Bernie Devlin, and Kathryn Roeder. 2017. Testing high-dimensional covariance matrices, with application to detecting schizophrenia risk genes. Annals of Applied Statistics. 11:3 1810-1831. doi:10.1214/17-AOAS1062

Examples

library(GenomicRanges)
library(EnsDb.Hsapiens.v86)

# load data
data('decorateData')

# load gene locations
ensdb = EnsDb.Hsapiens.v86

# Evaluate hierarchical clsutering
treeList = runOrderedClusteringGenome( simData, simLocation ) 
#> 
Evaluating:chr20          
#> 

# Choose cutoffs and return clusters
treeListClusters = createClusters( treeList, method = "meanClusterSize", meanClusterSize=c( 10, 20) )
#> Method:meanClusterSize

# Plot correlations and clusters in region defined by query
query = range(simLocation)

# Plot clusters
plotDecorate( ensdb, treeList, treeListClusters, simLocation, query)


# Evaluate Differential Correlation between two subsets of data
sledRes = evalDiffCorr( simData, metadata$Disease, simLocation, treeListClusters, npermute=c(20, 200, 2000))
#> Note that clusters of 2 or fewer features are omitted from analysis
#> 
#> # Clusters:67
#> Initial pass through all clusters...
#> Intensive second pass...
#> Combining results...
#> 

# get summary of results
df = summary( sledRes )

# print results
head(df)
#>   id chrom cluster       pValue       stat n.perm    p.adjust
#> 1 20 chr20       4 0.0004997501 -2.1949654   2000 0.006163585
#> 2 10 chr20       4 0.0004997501 -0.7501589   2000 0.006163585
#> 3 20 chr20       2 0.0004997501 -0.7501589   2000 0.006163585
#> 4 10 chr20      14 0.1343283582  0.1434676    200 0.813186813
#> 5 10 chr20      40 0.1592039801 -0.3653258    200 0.813186813
#> 6 20 chr20      19 0.1592039801 -0.3653258    200 0.813186813

# extract peak ID's from most significant cluster
peakIDs = getFeaturesInCluster( treeListClusters, df$chrom[1], df$cluster[1], "20")

# plot comparison of correlation matrices for peaks in peakIDs
#  where data is subset by metadata$Disease
main = paste0(df$chrom[1], ': cluster ', df$cluster[1])
plotCompareCorr( simData, peakIDs, metadata$Disease) + ggtitle(main)