Skip to contents

Collapse clusters if jaccard index between clusters exceeds a cutoff

Usage

collapseClusters(treeListClusters, featurePositions, jaccardCutoff = 0.9)

Arguments

treeListClusters

from createClusters()

featurePositions

GRanges object storing location of each feature

jaccardCutoff

cutoff value for jaccard index

Value

subset of clusters in treeListClusters that passes cutoff

Examples

library(GenomicRanges)
#> Loading required package: stats4
#> Loading required package: S4Vectors
#> 
#> Attaching package: ‘S4Vectors’
#> The following object is masked from ‘package:utils’:
#> 
#>     findMatches
#> The following objects are masked from ‘package:base’:
#> 
#>     I, expand.grid, unname
#> Loading required package: IRanges
#> Loading required package: GenomeInfoDb
library(BiocParallel)

# load data
data('decorateData')

# Evaluate hierarchical clustering
# adjacentCount is the number of adjacent peaks considered in correlation
treeList = runOrderedClusteringGenome( simData, simLocation)
#> 
Evaluating:chr20          
#> 

# Choose cutoffs and return cluster
treeListClusters = createClusters( treeList, method = "meanClusterSize", meanClusterSize=c( 10, 20, 30, 40, 50) )
#> Method:meanClusterSize

# Evaluate strength of correlation for each cluster
clstScore = scoreClusters(treeList, treeListClusters, BPPARAM = SerialParam() )
#> Evaluating strength of each cluster...
#> 
#> Dividing work into 5 chunks...

# Filter to retain only strong clusters
clustInclude = retainClusters( clstScore, "LEF", 0.30 )
#> Using cutoffs:
#> Cluster set	cutoff
#>  10		0.3
#>  20		0.3
#>  30		0.3
#>  40		0.3
#>  50		0.3
#> 

# get retained clusters
treeListClusters_filter = filterClusters( treeListClusters, clustInclude)

# collapse similar clusters
treeListClusters_collapse = collapseClusters( treeListClusters_filter, simLocation)
#> Identifying redundant clusters...