Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix CollectHSMetrics - Don't use Coverage Cap on fold score #1913

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,9 @@ public void finish() {

/** Calculates how much additional sequencing is needed to raise 80% of bases to the mean for the lane. */
private void calculateTargetCoverageMetrics() {
final long[] highQualityCoverageHistogramArray = new long[coverageCap+1];
// use HashMap because the length of this is defined by the sparse coverage depth.
final HashMap<Integer, Long> fullHighQualityDepthHistogramMap = new HashMap<>();

int zeroCoverageTargets = 0;

// the number of bases we counted towards the depth histogram plus those that got thrown out by the coverage cap
Expand All @@ -723,15 +725,17 @@ private void calculateTargetCoverageMetrics() {
for (final Coverage c : this.highQualityCoverageByTarget.values()) {
if (!c.hasCoverage()) {
zeroCoverageTargets++;
highQualityCoverageHistogramArray[0] += c.interval.length();
fullHighQualityDepthHistogramMap.put(0, (long) c.interval.length());
JoeVieira marked this conversation as resolved.
Show resolved Hide resolved
targetBases[0] += c.interval.length();
minDepth = 0;
continue;
}

for (final int depth : c.getDepths()) {
totalCoverage += depth;
highQualityCoverageHistogramArray[Math.min(depth, coverageCap)]++;
Long index = fullHighQualityDepthHistogramMap.getOrDefault(depth, 0L);
index = index + 1;
fullHighQualityDepthHistogramMap.put(depth, index);
maxDepth = Math.max(maxDepth, depth);
minDepth = Math.min(minDepth, depth);

Expand All @@ -747,11 +751,10 @@ private void calculateTargetCoverageMetrics() {
throw new PicardException("the number of target bases with at least 0x coverage does not equal the number of target bases");
}

for (int i = 0; i < highQualityCoverageHistogramArray.length; ++i) {
highQualityDepthHistogram.increment(i, highQualityCoverageHistogramArray[i]);
for (Map.Entry<Integer, Long> entry : fullHighQualityDepthHistogramMap.entrySet()) {
highQualityDepthHistogram.increment(entry.getKey(), entry.getValue());
JoeVieira marked this conversation as resolved.
Show resolved Hide resolved
}

// we do this instead of highQualityDepthHistogram.getMean() because the histogram imposes a coverage cap
metrics.MEAN_TARGET_COVERAGE = (double) totalCoverage / metrics.TARGET_TERRITORY;
metrics.MEDIAN_TARGET_COVERAGE = highQualityDepthHistogram.getMedian();
metrics.MAX_TARGET_COVERAGE = maxDepth;
Expand Down
35 changes: 35 additions & 0 deletions src/test/java/picard/analysis/directed/CollectHsMetricsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,41 @@ public void testHsMetricsHandlesIndelsAppropriately() throws IOException {
Assert.assertEquals(insWithIndelHandling.PCT_USABLE_BASES_ON_TARGET, 1.0d); // inserted bases are counted as on target
}

@Test
public void testHsMetricsF80DoesNotUseCovCap() throws IOException {
final SAMRecordSetBuilder highCoverage = new SAMRecordSetBuilder(true, SortOrder.coordinate);
JoeVieira marked this conversation as resolved.
Show resolved Hide resolved
final IntervalList targets = new IntervalList(highCoverage.getHeader());
final IntervalList baits = new IntervalList(highCoverage.getHeader());
targets.add(new Interval("chr1", 1000, 1199, false, "t1"));
baits.add(new Interval("chr1", 950, 1049, false, "b1"));
baits.add(new Interval("chr1", 1050, 1149, false, "b2"));
baits.add(new Interval("chr1", 1150, 1249, false, "b3"));

// Generate 100000 reads that fully cover the target in each BAM
for (int i=0; i<100000; ++i) {
highCoverage.addFrag( "r" + i, 0, 1000, false, false, "100M100M", null, 30);
}

// Write things out to file
final File dir = IOUtil.createTempDir("hsmetrics.test").toFile();
final File bs = new File(dir, "baits.interval_list").getAbsoluteFile();
final File ts = new File(dir, "targets.interval_list").getAbsoluteFile();
baits.write(bs);
targets.write(ts);
final File withHighCovBam = writeBam(highCoverage, new File(dir, "fold_80.bam"));

// Now run CollectHsMetrics
final File out = Files.createTempFile("hsmetrics_high_coverage.", ".txt").toFile();
runPicardCommandLine(Arrays.asList("COVERAGE_CAP=10", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withHighCovBam.getAbsolutePath()));
final HsMetrics highCoverageMetrics = readMetrics(out);

IOUtil.deleteDirectoryTree(dir);
// actual coverage should not be impacted by coverage_cap
Assert.assertEquals(highCoverageMetrics.MEAN_TARGET_COVERAGE, 100000);
Assert.assertEquals(highCoverageMetrics.MEDIAN_TARGET_COVERAGE, 100000);
Assert.assertEquals(highCoverageMetrics.FOLD_80_BASE_PENALTY, 1);
}


@Test
public void testHsMetricsHighTargetCoverage() throws IOException {
Expand Down
Loading