From 8cc976cf70f38c1398a57bce0c32ceea74fd5b6a Mon Sep 17 00:00:00 2001 From: Joe Vieira Date: Sat, 2 Sep 2023 09:48:39 -0400 Subject: [PATCH 01/13] No coverage cap on Histogram ArrayList used for statistics on actual coverage metrics --- .../directed/TargetMetricsCollector.java | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java index f31d6231c2..bc535c4bdd 100644 --- a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java +++ b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java @@ -67,6 +67,7 @@ import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; +import java.util.ArrayList; import java.util.Map; import java.util.Set; import java.util.stream.LongStream; @@ -126,6 +127,10 @@ public abstract class TargetMetricsCollector highQualityDepthHistogram = new Histogram<>("coverage_or_base_quality", "high_quality_coverage_count"); + // histogram of depths with no coverage cap imposed. + private final Histogram fullHighQualityDepthHistogram = new Histogram<>("coverage_or_base_quality", "high_quality_coverage_count"); + + // histogram of base qualities. includes all but quality 2 bases. we use this histogram to calculate theoretical het sensitivity. private final Histogram unfilteredDepthHistogram = new Histogram<>("coverage_or_base_quality", "unfiltered_coverage_count"); @@ -699,7 +704,9 @@ public void finish() { /** Calculates how much additional sequencing is needed to raise 80% of bases to the mean for the lane. */ private void calculateTargetCoverageMetrics() { - final long[] highQualityCoverageHistogramArray = new long[coverageCap+1]; + // use ArrayList because the length of this is defined by the coverage depth. + final ArrayList fullHighQualityDepthHistogramList = new ArrayList<>(); + int zeroCoverageTargets = 0; // the number of bases we counted towards the depth histogram plus those that got thrown out by the coverage cap @@ -723,7 +730,7 @@ private void calculateTargetCoverageMetrics() { for (final Coverage c : this.highQualityCoverageByTarget.values()) { if (!c.hasCoverage()) { zeroCoverageTargets++; - highQualityCoverageHistogramArray[0] += c.interval.length(); + fullHighQualityDepthHistogramList.set(0, (long) c.interval.length()); targetBases[0] += c.interval.length(); minDepth = 0; continue; @@ -731,7 +738,9 @@ private void calculateTargetCoverageMetrics() { for (final int depth : c.getDepths()) { totalCoverage += depth; - highQualityCoverageHistogramArray[Math.min(depth, coverageCap)]++; + Long index = fullHighQualityDepthHistogramList.get(depth); + index = index + 1; + fullHighQualityDepthHistogramList.set(depth, index); maxDepth = Math.max(maxDepth, depth); minDepth = Math.min(minDepth, depth); @@ -747,20 +756,19 @@ private void calculateTargetCoverageMetrics() { throw new PicardException("the number of target bases with at least 0x coverage does not equal the number of target bases"); } - for (int i = 0; i < highQualityCoverageHistogramArray.length; ++i) { - highQualityDepthHistogram.increment(i, highQualityCoverageHistogramArray[i]); + for (int i = 0; i < fullHighQualityDepthHistogramList.size(); ++i) { + fullHighQualityDepthHistogram.increment(i, fullHighQualityDepthHistogramList.get(i)); } - // we do this instead of highQualityDepthHistogram.getMean() because the histogram imposes a coverage cap metrics.MEAN_TARGET_COVERAGE = (double) totalCoverage / metrics.TARGET_TERRITORY; - metrics.MEDIAN_TARGET_COVERAGE = highQualityDepthHistogram.getMedian(); + metrics.MEDIAN_TARGET_COVERAGE = fullHighQualityDepthHistogram.getMedian(); metrics.MAX_TARGET_COVERAGE = maxDepth; // Use Math.min() to account for edge case where highQualityCoverageByTarget is empty (minDepth=Long.MAX_VALUE) metrics.MIN_TARGET_COVERAGE = Math.min(minDepth, maxDepth); // compute the coverage value such that 80% of target bases have better coverage than it i.e. 20th percentile // this roughly measures how much we must sequence extra such that 80% of target bases have coverage at least as deep as the current mean coverage - metrics.FOLD_80_BASE_PENALTY = metrics.MEAN_TARGET_COVERAGE / highQualityDepthHistogram.getPercentile(0.2); + metrics.FOLD_80_BASE_PENALTY = metrics.MEAN_TARGET_COVERAGE / fullHighQualityDepthHistogram.getPercentile(0.2); metrics.ZERO_CVG_TARGETS_PCT = zeroCoverageTargets / (double) allTargets.getIntervals().size(); // Store the "how many bases at at-least X" calculations. From 9ccf182e71a3e4c953742552060cbc3b16129398 Mon Sep 17 00:00:00 2001 From: Joe Vieira Date: Tue, 5 Sep 2023 18:03:17 -0400 Subject: [PATCH 02/13] Use hashmap for sparse array structure --- .../directed/TargetMetricsCollector.java | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java index bc535c4bdd..3ca2e0ba29 100644 --- a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java +++ b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java @@ -67,7 +67,6 @@ import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; -import java.util.ArrayList; import java.util.Map; import java.util.Set; import java.util.stream.LongStream; @@ -127,10 +126,6 @@ public abstract class TargetMetricsCollector highQualityDepthHistogram = new Histogram<>("coverage_or_base_quality", "high_quality_coverage_count"); - // histogram of depths with no coverage cap imposed. - private final Histogram fullHighQualityDepthHistogram = new Histogram<>("coverage_or_base_quality", "high_quality_coverage_count"); - - // histogram of base qualities. includes all but quality 2 bases. we use this histogram to calculate theoretical het sensitivity. private final Histogram unfilteredDepthHistogram = new Histogram<>("coverage_or_base_quality", "unfiltered_coverage_count"); @@ -704,8 +699,8 @@ public void finish() { /** Calculates how much additional sequencing is needed to raise 80% of bases to the mean for the lane. */ private void calculateTargetCoverageMetrics() { - // use ArrayList because the length of this is defined by the coverage depth. - final ArrayList fullHighQualityDepthHistogramList = new ArrayList<>(); + // use HashMap because the length of this is defined by the sparse coverage depth. + final HashMap fullHighQualityDepthHistogramMap = new HashMap<>(); int zeroCoverageTargets = 0; @@ -730,7 +725,7 @@ private void calculateTargetCoverageMetrics() { for (final Coverage c : this.highQualityCoverageByTarget.values()) { if (!c.hasCoverage()) { zeroCoverageTargets++; - fullHighQualityDepthHistogramList.set(0, (long) c.interval.length()); + fullHighQualityDepthHistogramMap.put(0, (long) c.interval.length()); targetBases[0] += c.interval.length(); minDepth = 0; continue; @@ -738,9 +733,9 @@ private void calculateTargetCoverageMetrics() { for (final int depth : c.getDepths()) { totalCoverage += depth; - Long index = fullHighQualityDepthHistogramList.get(depth); + Long index = fullHighQualityDepthHistogramMap.getOrDefault(depth, 0L); index = index + 1; - fullHighQualityDepthHistogramList.set(depth, index); + fullHighQualityDepthHistogramMap.put(depth, index); maxDepth = Math.max(maxDepth, depth); minDepth = Math.min(minDepth, depth); @@ -756,19 +751,19 @@ private void calculateTargetCoverageMetrics() { throw new PicardException("the number of target bases with at least 0x coverage does not equal the number of target bases"); } - for (int i = 0; i < fullHighQualityDepthHistogramList.size(); ++i) { - fullHighQualityDepthHistogram.increment(i, fullHighQualityDepthHistogramList.get(i)); + for (Map.Entry entry : fullHighQualityDepthHistogramMap.entrySet()) { + highQualityDepthHistogram.increment(entry.getKey(), entry.getValue()); } metrics.MEAN_TARGET_COVERAGE = (double) totalCoverage / metrics.TARGET_TERRITORY; - metrics.MEDIAN_TARGET_COVERAGE = fullHighQualityDepthHistogram.getMedian(); + metrics.MEDIAN_TARGET_COVERAGE = highQualityDepthHistogram.getMedian(); metrics.MAX_TARGET_COVERAGE = maxDepth; // Use Math.min() to account for edge case where highQualityCoverageByTarget is empty (minDepth=Long.MAX_VALUE) metrics.MIN_TARGET_COVERAGE = Math.min(minDepth, maxDepth); // compute the coverage value such that 80% of target bases have better coverage than it i.e. 20th percentile // this roughly measures how much we must sequence extra such that 80% of target bases have coverage at least as deep as the current mean coverage - metrics.FOLD_80_BASE_PENALTY = metrics.MEAN_TARGET_COVERAGE / fullHighQualityDepthHistogram.getPercentile(0.2); + metrics.FOLD_80_BASE_PENALTY = metrics.MEAN_TARGET_COVERAGE / highQualityDepthHistogram.getPercentile(0.2); metrics.ZERO_CVG_TARGETS_PCT = zeroCoverageTargets / (double) allTargets.getIntervals().size(); // Store the "how many bases at at-least X" calculations. From 6f9ebeb53b0404e516822c4c9919f0fb9ba46feb Mon Sep 17 00:00:00 2001 From: Joe Vieira Date: Tue, 5 Sep 2023 18:03:50 -0400 Subject: [PATCH 03/13] test coverage to ensure Fold80 & median coverage are not impacted by Coverage Cap - per documentation. --- .../directed/CollectHsMetricsTest.java | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java b/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java index 363eab1aa8..90ce6eb274 100644 --- a/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java +++ b/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java @@ -217,6 +217,41 @@ public void testHsMetricsHandlesIndelsAppropriately() throws IOException { Assert.assertEquals(insWithIndelHandling.PCT_USABLE_BASES_ON_TARGET, 1.0d); // inserted bases are counted as on target } + @Test + public void testHsMetricsF80DoesNotUseCovCap() throws IOException { + final SAMRecordSetBuilder highCoverage = new SAMRecordSetBuilder(true, SortOrder.coordinate); + final IntervalList targets = new IntervalList(highCoverage.getHeader()); + final IntervalList baits = new IntervalList(highCoverage.getHeader()); + targets.add(new Interval("chr1", 1000, 1199, false, "t1")); + baits.add(new Interval("chr1", 950, 1049, false, "b1")); + baits.add(new Interval("chr1", 1050, 1149, false, "b2")); + baits.add(new Interval("chr1", 1150, 1249, false, "b3")); + + // Generate 100000 reads that fully cover the target in each BAM + for (int i=0; i<100000; ++i) { + highCoverage.addFrag( "r" + i, 0, 1000, false, false, "100M100M", null, 30); + } + + // Write things out to file + final File dir = IOUtil.createTempDir("hsmetrics.test").toFile(); + final File bs = new File(dir, "baits.interval_list").getAbsoluteFile(); + final File ts = new File(dir, "targets.interval_list").getAbsoluteFile(); + baits.write(bs); + targets.write(ts); + final File withHighCovBam = writeBam(highCoverage, new File(dir, "fold_80.bam")); + + // Now run CollectHsMetrics + final File out = Files.createTempFile("hsmetrics_high_coverage.", ".txt").toFile(); + runPicardCommandLine(Arrays.asList("COVERAGE_CAP=10", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withHighCovBam.getAbsolutePath())); + final HsMetrics highCoverageMetrics = readMetrics(out); + + IOUtil.deleteDirectoryTree(dir); + // actual coverage should not be impacted by coverage_cap + Assert.assertEquals(highCoverageMetrics.MEAN_TARGET_COVERAGE, 100000); + Assert.assertEquals(highCoverageMetrics.MEDIAN_TARGET_COVERAGE, 100000); + Assert.assertEquals(highCoverageMetrics.FOLD_80_BASE_PENALTY, 1); + } + @Test public void testHsMetricsHighTargetCoverage() throws IOException { From 73a956327deeb2186a420020c1038bd751488642 Mon Sep 17 00:00:00 2001 From: Joe Vieira Date: Wed, 13 Sep 2023 17:05:12 -0400 Subject: [PATCH 04/13] Using histogram directly --- .../analysis/directed/TargetMetricsCollector.java | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java index 3ca2e0ba29..6d95ca3d57 100644 --- a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java +++ b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java @@ -699,8 +699,6 @@ public void finish() { /** Calculates how much additional sequencing is needed to raise 80% of bases to the mean for the lane. */ private void calculateTargetCoverageMetrics() { - // use HashMap because the length of this is defined by the sparse coverage depth. - final HashMap fullHighQualityDepthHistogramMap = new HashMap<>(); int zeroCoverageTargets = 0; @@ -725,7 +723,7 @@ private void calculateTargetCoverageMetrics() { for (final Coverage c : this.highQualityCoverageByTarget.values()) { if (!c.hasCoverage()) { zeroCoverageTargets++; - fullHighQualityDepthHistogramMap.put(0, (long) c.interval.length()); + highQualityDepthHistogram.increment(0, c.interval.length()); targetBases[0] += c.interval.length(); minDepth = 0; continue; @@ -733,9 +731,7 @@ private void calculateTargetCoverageMetrics() { for (final int depth : c.getDepths()) { totalCoverage += depth; - Long index = fullHighQualityDepthHistogramMap.getOrDefault(depth, 0L); - index = index + 1; - fullHighQualityDepthHistogramMap.put(depth, index); + highQualityDepthHistogram.increment(depth, 1); maxDepth = Math.max(maxDepth, depth); minDepth = Math.min(minDepth, depth); @@ -751,10 +747,6 @@ private void calculateTargetCoverageMetrics() { throw new PicardException("the number of target bases with at least 0x coverage does not equal the number of target bases"); } - for (Map.Entry entry : fullHighQualityDepthHistogramMap.entrySet()) { - highQualityDepthHistogram.increment(entry.getKey(), entry.getValue()); - } - metrics.MEAN_TARGET_COVERAGE = (double) totalCoverage / metrics.TARGET_TERRITORY; metrics.MEDIAN_TARGET_COVERAGE = highQualityDepthHistogram.getMedian(); metrics.MAX_TARGET_COVERAGE = maxDepth; From 0f95ef00b28250895845d4779f1c56959602e9f9 Mon Sep 17 00:00:00 2001 From: Joe Vieira Date: Mon, 29 Jan 2024 19:01:49 -0800 Subject: [PATCH 05/13] Normalize Depth Array directly & test --- .../analysis/TheoreticalSensitivity.java | 17 ++++++++ .../analysis/TheoreticalSensitivityTest.java | 40 +++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/src/main/java/picard/analysis/TheoreticalSensitivity.java b/src/main/java/picard/analysis/TheoreticalSensitivity.java index 08d0b8e9c8..bdfd6227bb 100644 --- a/src/main/java/picard/analysis/TheoreticalSensitivity.java +++ b/src/main/java/picard/analysis/TheoreticalSensitivity.java @@ -34,6 +34,7 @@ import java.util.*; import java.util.stream.IntStream; +import java.util.stream.LongStream; import com.google.common.annotations.VisibleForTesting; import org.apache.commons.math3.distribution.BinomialDistribution; @@ -198,6 +199,22 @@ public List> sampleCumulativeSums(final int maxNumberOfSumman } } + public static double[] normalizeDepthArray(final long[] depthArray) { + if (depthArray == null || depthArray.length == 0) { + throw new PicardException("Histogram is null and cannot be normalized"); + } + + long sumofValues = LongStream.of(depthArray).sum(); + final double[] normalizedHistogram = new double[depthArray.length]; + + for (int i = 0; i < depthArray.length; i++) { + normalizedHistogram[i] = (double) depthArray[i] / sumofValues; + } + + return normalizedHistogram; + + } + public static double[] normalizeHistogram(final Histogram histogram) { if (histogram == null) throw new PicardException("Histogram is null and cannot be normalized"); diff --git a/src/test/java/picard/analysis/TheoreticalSensitivityTest.java b/src/test/java/picard/analysis/TheoreticalSensitivityTest.java index 95a80a460a..cc9fb16b7f 100644 --- a/src/test/java/picard/analysis/TheoreticalSensitivityTest.java +++ b/src/test/java/picard/analysis/TheoreticalSensitivityTest.java @@ -203,6 +203,46 @@ public Object[][] hetSensDataProvider() { }; } + @Test(dataProvider = "hetSensDataProvider") + public void testHetSensTargetedHS(final double expected, final File metricsFile) throws Exception { + final double tolerance = 0.000_000_01; + + final MetricsFile metrics = new MetricsFile<>(); + try (final FileReader metricsFileReader = new FileReader(metricsFile)) { + metrics.read(metricsFileReader); + } + + final List> histograms = metrics.getAllHistograms(); + final Histogram depthHistogram = histograms.get(0); + final Histogram qualityHistogram = histograms.get(1); + + //flip histograms into array to do testing. + final long[] depthHistArray = new long[depthHistogram.size()]; + for (int i = 0; i < depthHistogram.size(); i++) { + if (depthHistogram.get(i) != null) { + depthHistArray[i] = (long) depthHistogram.get(i).getValue(); + } + } + + final long[] qualHistArray = new long[qualityHistogram.size()]; + for (int i = 0; i < qualityHistogram.size(); i++) { + if (qualityHistogram.get(i) != null) { + qualHistArray[i] = (long) qualityHistogram.get(i).getValue(); + } + } + + + final double[] depthDistribution = TheoreticalSensitivity.normalizeDepthArray(depthHistArray); + final double[] qualityDistribution = TheoreticalSensitivity.normalizeDepthArray(qualHistArray); + + final int sampleSize = 1_000; + final double logOddsThreshold = 3.0; + + final double result = TheoreticalSensitivity.hetSNPSensitivity(depthDistribution, qualityDistribution, sampleSize, logOddsThreshold); + Assert.assertEquals(result, expected, tolerance); + } + + @Test(dataProvider = "hetSensDataProvider") public void testHetSensTargeted(final double expected, final File metricsFile) throws Exception { final double tolerance = 0.000_000_01; From 299956a850268503279a3169398749cb07fc39f6 Mon Sep 17 00:00:00 2001 From: Joe Vieira Date: Mon, 29 Jan 2024 19:05:14 -0800 Subject: [PATCH 06/13] create unfiltered, capped array create unfiltered, uncapped histograph for output close TODO of normalizing array directly, to avoid the extra flip between array & histograph. --- .../directed/TargetMetricsCollector.java | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java index 6d95ca3d57..984d58b914 100644 --- a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java +++ b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java @@ -778,36 +778,30 @@ private void calculateTargetCoverageMetrics() { metrics.PCT_TARGET_BASES_100000X = (double) targetBases[17] / (double) targetBases[0]; } + private void calculateTheoreticalHetSensitivity(){ - final long[] unfilteredDepthHistogramArray = new long[coverageCap + 1]; + final long[] unfilteredDepthArray = new long[coverageCap + 1]; // collect the unfiltered coverages (i.e. only quality 2 bases excluded) for all targets into a histogram array for (final Coverage c : this.unfilteredCoverageByTarget.values()) { if (!c.hasCoverage()) { - unfilteredDepthHistogramArray[0] += c.interval.length(); + unfilteredDepthArray[0] += c.interval.length(); + unfilteredDepthHistogram.increment(0, c.interval.length()); continue; } for (final int depth : c.getDepths()) { - unfilteredDepthHistogramArray[Math.min(depth, coverageCap)]++; + unfilteredDepthArray[Math.min(depth, coverageCap)]++; + unfilteredDepthHistogram.increment(depth, 1); } } - if (LongStream.of(baseQHistogramArray).sum() != LongStream.rangeClosed(0, coverageCap).map(i -> i * unfilteredDepthHistogramArray[(int)i]).sum()) { + if (LongStream.of(baseQHistogramArray).sum() != LongStream.rangeClosed(0, coverageCap).map(i -> i * unfilteredDepthArray[(int)i]).sum()) { throw new PicardException("numbers of bases in the base quality histogram and the coverage histogram are not equal"); } - // TODO: normalize the arrays directly. then we don't have to convert to Histograms - for (int i=0; i Date: Tue, 30 Jan 2024 20:13:57 -0800 Subject: [PATCH 07/13] remove tests for normalizeDepthArray --- .../analysis/TheoreticalSensitivityTest.java | 39 ------------------- 1 file changed, 39 deletions(-) diff --git a/src/test/java/picard/analysis/TheoreticalSensitivityTest.java b/src/test/java/picard/analysis/TheoreticalSensitivityTest.java index cc9fb16b7f..cd0463970c 100644 --- a/src/test/java/picard/analysis/TheoreticalSensitivityTest.java +++ b/src/test/java/picard/analysis/TheoreticalSensitivityTest.java @@ -203,45 +203,6 @@ public Object[][] hetSensDataProvider() { }; } - @Test(dataProvider = "hetSensDataProvider") - public void testHetSensTargetedHS(final double expected, final File metricsFile) throws Exception { - final double tolerance = 0.000_000_01; - - final MetricsFile metrics = new MetricsFile<>(); - try (final FileReader metricsFileReader = new FileReader(metricsFile)) { - metrics.read(metricsFileReader); - } - - final List> histograms = metrics.getAllHistograms(); - final Histogram depthHistogram = histograms.get(0); - final Histogram qualityHistogram = histograms.get(1); - - //flip histograms into array to do testing. - final long[] depthHistArray = new long[depthHistogram.size()]; - for (int i = 0; i < depthHistogram.size(); i++) { - if (depthHistogram.get(i) != null) { - depthHistArray[i] = (long) depthHistogram.get(i).getValue(); - } - } - - final long[] qualHistArray = new long[qualityHistogram.size()]; - for (int i = 0; i < qualityHistogram.size(); i++) { - if (qualityHistogram.get(i) != null) { - qualHistArray[i] = (long) qualityHistogram.get(i).getValue(); - } - } - - - final double[] depthDistribution = TheoreticalSensitivity.normalizeDepthArray(depthHistArray); - final double[] qualityDistribution = TheoreticalSensitivity.normalizeDepthArray(qualHistArray); - - final int sampleSize = 1_000; - final double logOddsThreshold = 3.0; - - final double result = TheoreticalSensitivity.hetSNPSensitivity(depthDistribution, qualityDistribution, sampleSize, logOddsThreshold); - Assert.assertEquals(result, expected, tolerance); - } - @Test(dataProvider = "hetSensDataProvider") public void testHetSensTargeted(final double expected, final File metricsFile) throws Exception { From ad4ba42df3d9163d3c5551a6bbf458d745963d20 Mon Sep 17 00:00:00 2001 From: Joe Vieira Date: Tue, 30 Jan 2024 20:14:11 -0800 Subject: [PATCH 08/13] remove normalizeDepthArray --- .../picard/analysis/TheoreticalSensitivity.java | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/main/java/picard/analysis/TheoreticalSensitivity.java b/src/main/java/picard/analysis/TheoreticalSensitivity.java index bdfd6227bb..6f1194ddd5 100644 --- a/src/main/java/picard/analysis/TheoreticalSensitivity.java +++ b/src/main/java/picard/analysis/TheoreticalSensitivity.java @@ -199,22 +199,6 @@ public List> sampleCumulativeSums(final int maxNumberOfSumman } } - public static double[] normalizeDepthArray(final long[] depthArray) { - if (depthArray == null || depthArray.length == 0) { - throw new PicardException("Histogram is null and cannot be normalized"); - } - - long sumofValues = LongStream.of(depthArray).sum(); - final double[] normalizedHistogram = new double[depthArray.length]; - - for (int i = 0; i < depthArray.length; i++) { - normalizedHistogram[i] = (double) depthArray[i] / sumofValues; - } - - return normalizedHistogram; - - } - public static double[] normalizeHistogram(final Histogram histogram) { if (histogram == null) throw new PicardException("Histogram is null and cannot be normalized"); From 894eefb55b3e83e2cad23824a312bd72c71e7453 Mon Sep 17 00:00:00 2001 From: Joe Vieira Date: Tue, 30 Jan 2024 20:29:35 -0800 Subject: [PATCH 09/13] build uncapped data for outputting histograms keep capped data for theoretical stats move calculation of min / max depth into base loading ensure histograms are not sparsely populated --- .../directed/TargetMetricsCollector.java | 51 +++++++++++++------ 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java index 984d58b914..e6677becc8 100644 --- a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java +++ b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java @@ -128,9 +128,11 @@ public abstract class TargetMetricsCollector unfilteredDepthHistogram = new Histogram<>("coverage_or_base_quality", "unfiltered_coverage_count"); + private final Histogram unCappedUnfilteredDepthHistogram = new Histogram<>("coverage_or_base_quality", "unfiltered_coverage_count"); // histogram of base qualities. includes all but quality 2 bases. we use this histogram to calculate theoretical het sensitivity. private final Histogram unfilteredBaseQHistogram = new Histogram<>("baseq", "unfiltered_baseq_count"); + private final Histogram unCappedUnfilteredBaseQHistogram = new Histogram<>("baseq", "unfiltered_baseq_count"); private static final double LOG_ODDS_THRESHOLD = 3.0; @@ -367,6 +369,8 @@ public class PerUnitTargetMetricCollector implements PerUnitMetricCollector highQualityCoverageByTarget; @@ -374,6 +378,9 @@ public class PerUnitTargetMetricCollector implements PerUnitMetricCollector unfilteredCoverageByTarget; + private long ufMaxDepth = 0; + private long hqMaxDepth = 0; + private final TargetMetrics metrics = new TargetMetrics(); private final int minimumBaseQuality; private final CountingAdapterFilter adapterFilter; @@ -631,11 +638,14 @@ public void acceptRecord(final SAMRecord record) { if (ufCoverage.getDepths()[targetOffset] <= coverageCap) { baseQHistogramArray[qual]++; } + unCappedBaseQHistogramArray[qual]++; + ufMaxDepth = Math.max(ufMaxDepth, ufCoverage.getDepths()[targetOffset]); // Then filtered if (highQual) { final Coverage hqCoverage = highQualityCoverageByTarget.get(target); hqCoverage.addBase(targetOffset); + hqMaxDepth = Math.max(hqMaxDepth, hqCoverage.getDepths()[targetOffset]); if (coveredTargets.add(target)) { hqCoverage.incrementReadCount(); @@ -700,15 +710,13 @@ public void finish() { /** Calculates how much additional sequencing is needed to raise 80% of bases to the mean for the lane. */ private void calculateTargetCoverageMetrics() { + LongStream.range(0, hqMaxDepth).forEach(i -> highQualityDepthHistogram.increment((int) i, 0)); + int zeroCoverageTargets = 0; // the number of bases we counted towards the depth histogram plus those that got thrown out by the coverage cap long totalCoverage = 0; - // the maximum depth at any target base - long maxDepth = 0; - - // the minimum depth at any target base long minDepth = Long.MAX_VALUE; // The "how many target bases at at-least X" calculations. @@ -732,7 +740,6 @@ private void calculateTargetCoverageMetrics() { for (final int depth : c.getDepths()) { totalCoverage += depth; highQualityDepthHistogram.increment(depth, 1); - maxDepth = Math.max(maxDepth, depth); minDepth = Math.min(minDepth, depth); // Add to the "how many target bases at at-least X" calculations. @@ -749,9 +756,9 @@ private void calculateTargetCoverageMetrics() { metrics.MEAN_TARGET_COVERAGE = (double) totalCoverage / metrics.TARGET_TERRITORY; metrics.MEDIAN_TARGET_COVERAGE = highQualityDepthHistogram.getMedian(); - metrics.MAX_TARGET_COVERAGE = maxDepth; + metrics.MAX_TARGET_COVERAGE = hqMaxDepth; // Use Math.min() to account for edge case where highQualityCoverageByTarget is empty (minDepth=Long.MAX_VALUE) - metrics.MIN_TARGET_COVERAGE = Math.min(minDepth, maxDepth); + metrics.MIN_TARGET_COVERAGE = Math.min(minDepth, hqMaxDepth); // compute the coverage value such that 80% of target bases have better coverage than it i.e. 20th percentile // this roughly measures how much we must sequence extra such that 80% of target bases have coverage at least as deep as the current mean coverage @@ -780,28 +787,42 @@ private void calculateTargetCoverageMetrics() { private void calculateTheoreticalHetSensitivity(){ - final long[] unfilteredDepthArray = new long[coverageCap + 1]; + LongStream.range(0, coverageCap).forEach(i -> unfilteredDepthHistogram.increment((int) i, 0)); + LongStream.range(0, ufMaxDepth).forEach(i -> unCappedUnfilteredDepthHistogram.increment((int) i, 0)); // collect the unfiltered coverages (i.e. only quality 2 bases excluded) for all targets into a histogram array for (final Coverage c : this.unfilteredCoverageByTarget.values()) { if (!c.hasCoverage()) { - unfilteredDepthArray[0] += c.interval.length(); unfilteredDepthHistogram.increment(0, c.interval.length()); + unCappedUnfilteredDepthHistogram.increment(0, c.interval.length()); continue; } for (final int depth : c.getDepths()) { - unfilteredDepthArray[Math.min(depth, coverageCap)]++; - unfilteredDepthHistogram.increment(depth, 1); + unfilteredDepthHistogram.increment(Math.min(depth, coverageCap), 1); + unCappedUnfilteredDepthHistogram.increment(depth, 1); } } - if (LongStream.of(baseQHistogramArray).sum() != LongStream.rangeClosed(0, coverageCap).map(i -> i * unfilteredDepthArray[(int)i]).sum()) { + if (LongStream.of(baseQHistogramArray).sum() != unfilteredDepthHistogram.getSum()) { throw new PicardException("numbers of bases in the base quality histogram and the coverage histogram are not equal"); } - final double [] depthDoubleArray = TheoreticalSensitivity.normalizeDepthArray(unfilteredDepthArray); - final double [] baseQDoubleArray = TheoreticalSensitivity.normalizeDepthArray(baseQHistogramArray); + if (LongStream.of(unCappedBaseQHistogramArray).sum() != unCappedUnfilteredDepthHistogram.getSum()) { + throw new PicardException("numbers of bases in the uncapped base quality histogram and the uncapped coverage histogram are not equal"); + } + + //TODO this is used elsewhere, so can't remove from here just yet - consider moving to accessor. + for (int i=0; i hsMetricsComparableMetricsFile) { hsMetricsComparableMetricsFile.addMetric(convertMetric(this.metrics)); hsMetricsComparableMetricsFile.addHistogram(highQualityDepthHistogram); - hsMetricsComparableMetricsFile.addHistogram(unfilteredBaseQHistogram); + hsMetricsComparableMetricsFile.addHistogram(unCappedUnfilteredBaseQHistogram); } } From 5a9c4b7c238b0cb3d3b98f4518d635fbc669a2e7 Mon Sep 17 00:00:00 2001 From: Joe Vieira Date: Tue, 30 Jan 2024 20:35:33 -0800 Subject: [PATCH 10/13] remove longstream import --- src/main/java/picard/analysis/TheoreticalSensitivity.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/picard/analysis/TheoreticalSensitivity.java b/src/main/java/picard/analysis/TheoreticalSensitivity.java index 6f1194ddd5..08d0b8e9c8 100644 --- a/src/main/java/picard/analysis/TheoreticalSensitivity.java +++ b/src/main/java/picard/analysis/TheoreticalSensitivity.java @@ -34,7 +34,6 @@ import java.util.*; import java.util.stream.IntStream; -import java.util.stream.LongStream; import com.google.common.annotations.VisibleForTesting; import org.apache.commons.math3.distribution.BinomialDistribution; From f1b6a6554139f22a933295b68f91183ea164a0b9 Mon Sep 17 00:00:00 2001 From: Joe Vieira Date: Tue, 30 Jan 2024 20:36:04 -0800 Subject: [PATCH 11/13] clean up extra whitespace --- src/test/java/picard/analysis/TheoreticalSensitivityTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/java/picard/analysis/TheoreticalSensitivityTest.java b/src/test/java/picard/analysis/TheoreticalSensitivityTest.java index cd0463970c..95a80a460a 100644 --- a/src/test/java/picard/analysis/TheoreticalSensitivityTest.java +++ b/src/test/java/picard/analysis/TheoreticalSensitivityTest.java @@ -203,7 +203,6 @@ public Object[][] hetSensDataProvider() { }; } - @Test(dataProvider = "hetSensDataProvider") public void testHetSensTargeted(final double expected, final File metricsFile) throws Exception { final double tolerance = 0.000_000_01; From 817dddee442c8a2d3aa49d755ec50449e2562fa6 Mon Sep 17 00:00:00 2001 From: Joe Vieira Date: Tue, 30 Jan 2024 20:58:12 -0800 Subject: [PATCH 12/13] remove uncappedunfiltered, it's not needed --- .../picard/analysis/directed/TargetMetricsCollector.java | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java index e6677becc8..3ff96e2acd 100644 --- a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java +++ b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java @@ -128,7 +128,6 @@ public abstract class TargetMetricsCollector unfilteredDepthHistogram = new Histogram<>("coverage_or_base_quality", "unfiltered_coverage_count"); - private final Histogram unCappedUnfilteredDepthHistogram = new Histogram<>("coverage_or_base_quality", "unfiltered_coverage_count"); // histogram of base qualities. includes all but quality 2 bases. we use this histogram to calculate theoretical het sensitivity. private final Histogram unfilteredBaseQHistogram = new Histogram<>("baseq", "unfiltered_baseq_count"); @@ -788,19 +787,16 @@ private void calculateTargetCoverageMetrics() { private void calculateTheoreticalHetSensitivity(){ LongStream.range(0, coverageCap).forEach(i -> unfilteredDepthHistogram.increment((int) i, 0)); - LongStream.range(0, ufMaxDepth).forEach(i -> unCappedUnfilteredDepthHistogram.increment((int) i, 0)); // collect the unfiltered coverages (i.e. only quality 2 bases excluded) for all targets into a histogram array for (final Coverage c : this.unfilteredCoverageByTarget.values()) { if (!c.hasCoverage()) { unfilteredDepthHistogram.increment(0, c.interval.length()); - unCappedUnfilteredDepthHistogram.increment(0, c.interval.length()); continue; } for (final int depth : c.getDepths()) { unfilteredDepthHistogram.increment(Math.min(depth, coverageCap), 1); - unCappedUnfilteredDepthHistogram.increment(depth, 1); } } @@ -808,10 +804,6 @@ private void calculateTheoreticalHetSensitivity(){ throw new PicardException("numbers of bases in the base quality histogram and the coverage histogram are not equal"); } - if (LongStream.of(unCappedBaseQHistogramArray).sum() != unCappedUnfilteredDepthHistogram.getSum()) { - throw new PicardException("numbers of bases in the uncapped base quality histogram and the uncapped coverage histogram are not equal"); - } - //TODO this is used elsewhere, so can't remove from here just yet - consider moving to accessor. for (int i=0; i Date: Wed, 31 Jan 2024 11:01:12 -0800 Subject: [PATCH 13/13] clean up unneeded properties --- .../java/picard/analysis/directed/TargetMetricsCollector.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java index 3ff96e2acd..39341fad0c 100644 --- a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java +++ b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java @@ -377,7 +377,6 @@ public class PerUnitTargetMetricCollector implements PerUnitMetricCollector unfilteredCoverageByTarget; - private long ufMaxDepth = 0; private long hqMaxDepth = 0; private final TargetMetrics metrics = new TargetMetrics(); @@ -638,7 +637,6 @@ public void acceptRecord(final SAMRecord record) { baseQHistogramArray[qual]++; } unCappedBaseQHistogramArray[qual]++; - ufMaxDepth = Math.max(ufMaxDepth, ufCoverage.getDepths()[targetOffset]); // Then filtered if (highQual) {