From 7b573c3124b589fe520e9d424315a0e2ed4b764a Mon Sep 17 00:00:00 2001 From: tfenne Date: Wed, 23 Jan 2019 15:20:29 -0700 Subject: [PATCH 1/6] Added handling for indels within CollectHsMetrics and CollectTargetedPcrMetrics. When the INCLUDE_INDELS parameter is set to true: 1. Deletions within reads are treated as covering the reference bases if the preceding base is high quality 2. Inserted bases are counted as "on-target" but don't contribute coverage to any target base(s) --- .../analysis/directed/CollectHsMetrics.java | 2 +- .../directed/CollectTargetedMetrics.java | 3 + .../directed/CollectTargetedPcrMetrics.java | 2 +- .../analysis/directed/HsMetricCollector.java | 3 +- .../directed/TargetMetricsCollector.java | 118 ++++++++++-------- .../directed/TargetedPcrMetricsCollector.java | 3 +- 6 files changed, 73 insertions(+), 58 deletions(-) diff --git a/src/main/java/picard/analysis/directed/CollectHsMetrics.java b/src/main/java/picard/analysis/directed/CollectHsMetrics.java index cef834b633..4180fc4d83 100644 --- a/src/main/java/picard/analysis/directed/CollectHsMetrics.java +++ b/src/main/java/picard/analysis/directed/CollectHsMetrics.java @@ -176,6 +176,6 @@ protected HsMetricCollector makeCollector(final Set acc final String probeSetName, final int nearProbeDistance) { return new HsMetricCollector(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance, - MINIMUM_MAPPING_QUALITY, MINIMUM_BASE_QUALITY, CLIP_OVERLAPPING_READS, true, COVERAGE_CAP, SAMPLE_SIZE); + MINIMUM_MAPPING_QUALITY, MINIMUM_BASE_QUALITY, CLIP_OVERLAPPING_READS, true, INCLUDE_INDELS, COVERAGE_CAP, SAMPLE_SIZE); } } diff --git a/src/main/java/picard/analysis/directed/CollectTargetedMetrics.java b/src/main/java/picard/analysis/directed/CollectTargetedMetrics.java index b3430b3a9d..d8dd418e40 100644 --- a/src/main/java/picard/analysis/directed/CollectTargetedMetrics.java +++ b/src/main/java/picard/analysis/directed/CollectTargetedMetrics.java @@ -93,6 +93,9 @@ protected abstract COLLECTOR makeCollector(final Set ac @Argument(doc = "True if we are to clip overlapping reads, false otherwise.", optional=true) public boolean CLIP_OVERLAPPING_READS = false; + @Argument(doc= "If true count inserted bases as on target and deleted bases as covered by a read.") + public boolean INCLUDE_INDELS = false; + @Argument(shortName = "covMax", doc = "Parameter to set a max coverage limit for Theoretical Sensitivity calculations. Default is 200.", optional = true) public int COVERAGE_CAP = 200; diff --git a/src/main/java/picard/analysis/directed/CollectTargetedPcrMetrics.java b/src/main/java/picard/analysis/directed/CollectTargetedPcrMetrics.java index 3051d33e0c..7325f372ee 100644 --- a/src/main/java/picard/analysis/directed/CollectTargetedPcrMetrics.java +++ b/src/main/java/picard/analysis/directed/CollectTargetedPcrMetrics.java @@ -115,6 +115,6 @@ protected TargetedPcrMetricsCollector makeCollector(final Set accumulationLevels, final int minimumBaseQuality, final boolean clipOverlappingReads, final boolean noSideEffects, + final boolean includeIndels, final int coverageCap, final int sampleSize) { - super(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance, minimumMappingQuality, minimumBaseQuality, clipOverlappingReads, noSideEffects, coverageCap, sampleSize); + super(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance, minimumMappingQuality, minimumBaseQuality, clipOverlappingReads, noSideEffects, includeIndels, coverageCap, sampleSize); } @Override diff --git a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java index 4a8b3f3350..162fbde3c5 100644 --- a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java +++ b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java @@ -24,11 +24,7 @@ package picard.analysis.directed; -import htsjdk.samtools.AlignmentBlock; -import htsjdk.samtools.SAMReadGroupRecord; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMSequenceRecord; -import htsjdk.samtools.SAMUtils; +import htsjdk.samtools.*; import htsjdk.samtools.metrics.MetricBase; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.reference.ReferenceSequence; @@ -130,6 +126,7 @@ public abstract class TargetMetricsCollector accumulationLev final boolean clipOverlappingReads, final int coverageCap, final int sampleSize) { - this(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance, minimumMappingQuality, minimumBaseQuality, clipOverlappingReads, false, coverageCap, sampleSize); + this(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance, minimumMappingQuality, minimumBaseQuality, clipOverlappingReads, false, false, coverageCap, sampleSize); } public TargetMetricsCollector(final Set accumulationLevels, @@ -270,6 +267,7 @@ public TargetMetricsCollector(final Set accumulationLev final int minimumBaseQuality, final boolean clipOverlappingReads, final boolean noSideEffects, + final boolean includeIndels, final int coverageCap, final int sampleSize) { this.perTargetCoverage = perTargetCoverage; @@ -321,6 +319,7 @@ public TargetMetricsCollector(final Set accumulationLev this.minimumBaseQuality = minimumBaseQuality; this.clipOverlappingReads = clipOverlappingReads; this.noSideEffects = noSideEffects; + this.includeIndels = includeIndels; setup(accumulationLevels, samRgRecords); } @@ -329,7 +328,7 @@ public TargetMetricsCollector(final Set accumulationLev protected PerUnitMetricCollector makeChildCollector(final String sample, final String library, final String readGroup) { final PerUnitTargetMetricCollector collector = new PerUnitTargetMetricCollector(probeSetName, coverageByTargetForRead.keySet(), sample, library, readGroup, probeTerritory, targetTerritory, genomeSize, - intervalToGc, minimumMappingQuality, minimumBaseQuality, clipOverlappingReads); + intervalToGc, minimumMappingQuality, minimumBaseQuality, clipOverlappingReads, includeIndels); if (this.probeSetName != null) { collector.setBaitSetName(probeSetName); } @@ -366,6 +365,7 @@ public class PerUnitTargetMetricCollector implements PerUnitMetricCollector intervalToGc, final int minimumMappingQuality, final int minimumBaseQuality, - final boolean clipOverlappingReads) { + final boolean clipOverlappingReads, + final boolean includeIndels) { this.metrics.SAMPLE = sample; this.metrics.LIBRARY = library; this.metrics.READ_GROUP = readGroup; @@ -399,6 +400,7 @@ public PerUnitTargetMetricCollector(final String probeSetName, final Set coveredTargets = new HashSet<>(); - for (final AlignmentBlock block : rec.getAlignmentBlocks()) { - final int length = block.getLength(); - final int refStart = block.getReferenceStart(); - final int readStart = block.getReadStart(); - - for (int offset = 0; offset < length; ++offset) { - final int refPos = refStart + offset; - final int readPos = readStart + offset; - final int qual = baseQualities[readPos - 1]; - - if (qual <= 2) { - metrics.PCT_EXC_BASEQ++; - continue; - } + final Set coveredTargets = new HashSet<>(); // Each target is added to this the first time the read covers it + int readOffset = 0; + int refOffset = rec.getAlignmentStart() -1; + + for (final CigarElement cig : rec.getCigar()) { + final CigarOperator op = cig.getOperator(); + + for (int i=0; i= this.minimumBaseQuality; + final boolean onTarget = targets.stream().anyMatch(t -> t.getStart() <= refPos && t.getEnd() >= refPos); + final boolean incrementPerTargetCoverage = op != CigarOperator.INSERTION; // Inserted bases don't have a target position + + if (op.isAlignment() || (this.includeIndels && op.isIndel())) { + // Firstly handle all the summary metrics + if (!highQual) { + metrics.PCT_EXC_BASEQ++; + } else if (!onTarget) { + metrics.PCT_EXC_OFF_TARGET++; + } else { + metrics.ON_TARGET_BASES++; + if (mappedInPair) metrics.ON_TARGET_FROM_PAIR_BASES++; + } - boolean isOnTarget = false; - for (final Interval target : targets) { - if (refPos >= target.getStart() && refPos <= target.getEnd()) { - final int targetOffset = refPos - target.getStart(); - - // if the base quality exceeds the minimum threshold, then we update various metrics - if (qual >= minimumBaseQuality) { - ++metrics.ON_TARGET_BASES; - if (mappedInPair) ++metrics.ON_TARGET_FROM_PAIR_BASES; - final Coverage highQualityCoverage = highQualityCoverageByTarget.get(target); - highQualityCoverage.addBase(targetOffset); - if (!coveredTargets.contains(target)) { - highQualityCoverage.incrementReadCount(); - coveredTargets.add(target); - isOnTarget = true; + // Then go through the per-target/per-base hq and unfiltered coverage + // The cutoff of >= 2 is because even the unfilteredCoverage doesn't want Q1 or Q0 bases! + if (qual > 2 && incrementPerTargetCoverage) { + for (final Interval target : targets) { + if (refPos >= target.getStart() && refPos <= target.getEnd()) { + final int targetOffset = refPos - target.getStart(); + + // Unfiltered first (for theoretical het sensitivity) + final Coverage ufCoverage = unfilteredCoverageByTarget.get(target); + ufCoverage.addBase(targetOffset); + if (ufCoverage.getDepths()[targetOffset] <= coverageCap) baseQHistogramArray[qual]++; + + // Then filtered + if (highQual) { + final Coverage hqCoverage = highQualityCoverageByTarget.get(target); + hqCoverage.addBase(targetOffset); + + if (coveredTargets.add(target)) { + hqCoverage.incrementReadCount(); + } + } } - - } else { - // the base quality is in the range (2, minimumBaseQuality). we exclude them from the high-quality coverage histogram - this.metrics.PCT_EXC_BASEQ++; - } - - // even when the base quality is below minimumBaseQuality (but higher than 2), update the base quality and unfiltered coverage histogram for theoretical het sensitivity - // we don't bother with the read count for unfiltered coverage histogram because we don't use it - unfilteredCoverageByTarget.get(target).addBase(targetOffset); - - // we do not want to increment the base quality histogram for bases that will eventually get thrown out by the coverage cap - if (unfilteredCoverageByTarget.get(target).getDepths()[targetOffset] <= coverageCap){ - baseQHistogramArray[qual]++; } } } - // a base must not be on target and its base quality must exceed minimumBaseQuality for us to increment PCT_EXC_OFF_TARGET - if (!isOnTarget) this.metrics.PCT_EXC_OFF_TARGET++; - + // Finally update the offsets! + if (op.consumesReadBases()) readOffset += 1; + if (op.consumesReferenceBases()) refOffset += 1; } } } diff --git a/src/main/java/picard/analysis/directed/TargetedPcrMetricsCollector.java b/src/main/java/picard/analysis/directed/TargetedPcrMetricsCollector.java index 6002098196..687781c242 100644 --- a/src/main/java/picard/analysis/directed/TargetedPcrMetricsCollector.java +++ b/src/main/java/picard/analysis/directed/TargetedPcrMetricsCollector.java @@ -73,9 +73,10 @@ public TargetedPcrMetricsCollector(final Set accumulati final int minimumBaseQuality, final boolean clipOverlappingReads, final boolean noSideEffects, + final boolean includeIndels, final int coverageCap, final int sampleSize) { - super(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance, minimumMappingQuality, minimumBaseQuality, clipOverlappingReads, noSideEffects, coverageCap, sampleSize); + super(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance, minimumMappingQuality, minimumBaseQuality, clipOverlappingReads, noSideEffects, includeIndels, coverageCap, sampleSize); } @Override public TargetedPcrMetrics convertMetric(final TargetMetrics targetMetrics) { From 57bc875826315b7b35a6fd1c62b99fb65a0d84ef Mon Sep 17 00:00:00 2001 From: tfenne Date: Wed, 23 Jan 2019 15:54:44 -0700 Subject: [PATCH 2/6] Fix performance regression. --- .../directed/TargetMetricsCollector.java | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java index 162fbde3c5..529b5bf8c6 100644 --- a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java +++ b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java @@ -556,22 +556,22 @@ public void acceptRecord(final SAMRecord record) { // 3. Unfiltered coverage information for het sensitivity // 4. The count of bases rejected for being low baseq or off-target // 5. The count of on-target bases and on-target bases from paired reads - // Find the target overlaps final Set coveredTargets = new HashSet<>(); // Each target is added to this the first time the read covers it int readOffset = 0; int refOffset = rec.getAlignmentStart() -1; for (final CigarElement cig : rec.getCigar()) { final CigarOperator op = cig.getOperator(); + final int len = cig.getLength(); - for (int i=0; i= this.minimumBaseQuality; - final boolean onTarget = targets.stream().anyMatch(t -> t.getStart() <= refPos && t.getEnd() >= refPos); - final boolean incrementPerTargetCoverage = op != CigarOperator.INSERTION; // Inserted bases don't have a target position - + for (int i=0; i= this.minimumBaseQuality; + final boolean onTarget = overlapsAny(refPos, targets); + final boolean incrementPerTargetCoverage = op != CigarOperator.INSERTION; // Inserted bases don't have a target position + // Firstly handle all the summary metrics if (!highQual) { metrics.PCT_EXC_BASEQ++; @@ -584,7 +584,7 @@ public void acceptRecord(final SAMRecord record) { // Then go through the per-target/per-base hq and unfiltered coverage // The cutoff of >= 2 is because even the unfilteredCoverage doesn't want Q1 or Q0 bases! - if (qual > 2 && incrementPerTargetCoverage) { + if (qual > 2 && incrementPerTargetCoverage && onTarget) { for (final Interval target : targets) { if (refPos >= target.getStart() && refPos <= target.getEnd()) { final int targetOffset = refPos - target.getStart(); @@ -615,6 +615,14 @@ public void acceptRecord(final SAMRecord record) { } } + /* Returns true if the `pos` is between the `start` and `end` of at least one interval. */ + private boolean overlapsAny(final int pos, final Collection intervals) { + for (final Interval interval : intervals) { + if (pos >= interval.getStart() && pos <= interval.getEnd()) return true; + } + return false; + } + @Override public void finish() { metrics.PCT_PF_READS = metrics.PF_READS / (double) metrics.TOTAL_READS; From 7fd7d8f631f70b2c6dcfa14944812bf7e78409f0 Mon Sep 17 00:00:00 2001 From: tfenne Date: Thu, 24 Jan 2019 08:24:22 -0700 Subject: [PATCH 3/6] Added test for indel handling in hs metrics. --- .../directed/CollectHsMetricsTest.java | 93 ++++++++++++++++--- 1 file changed, 79 insertions(+), 14 deletions(-) diff --git a/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java b/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java index 2ab85a1c9f..9eda3826cb 100644 --- a/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java +++ b/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java @@ -1,7 +1,11 @@ package picard.analysis.directed; +import htsjdk.samtools.SAMFileHeader.SortOrder; +import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.SAMFileWriterFactory; +import htsjdk.samtools.SAMRecordSetBuilder; import htsjdk.samtools.metrics.MetricsFile; -import htsjdk.samtools.util.Histogram; +import htsjdk.samtools.util.*; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -10,6 +14,8 @@ import java.io.File; import java.io.FileReader; import java.io.IOException; +import java.nio.file.Files; +import java.util.Arrays; public class CollectHsMetricsTest extends CommandLineProgramTest { private final static File TEST_DIR = new File("testdata/picard/analysis/directed/CollectHsMetrics"); @@ -41,6 +47,26 @@ public Object[][] targetedIntervalDataProvider() { }; } + /** Read back the first metrics record in an hs metrics file. */ + HsMetrics readMetrics(final File f) { + try { + final MetricsFile> mFile = new MetricsFile>(); + mFile.read(new FileReader(f)); + return mFile.getMetrics().get(0); + } + catch (IOException ioe) { + throw new RuntimeIOException(ioe); + } + } + + /** Writes the contents of a SAMRecordSetBuilder out to a file. */ + File writeBam(final SAMRecordSetBuilder builder, final File f) { + final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(builder.getHeader(), false, f); + builder.forEach(out::addAlignment); + out.close(); + return f; + } + @Test(dataProvider = "collectHsMetricsDataProvider") public void runCollectHsMetricsTest(final String input, final String targetIntervals, @@ -72,19 +98,14 @@ public void runCollectHsMetricsTest(final String input, Assert.assertEquals(runPicardCommandLine(args), 0); - final MetricsFile> output = new MetricsFile>(); - output.read(new FileReader(outfile)); - - for (final HsMetrics metrics : output.getMetrics()) { - // overlap - Assert.assertEquals(metrics.TOTAL_READS, totalReads); - Assert.assertEquals(metrics.PF_UQ_BASES_ALIGNED, pfUqBasesAligned); - Assert.assertEquals(metrics.PCT_EXC_BASEQ, pctExcBaseq); - Assert.assertEquals(metrics.PCT_EXC_OVERLAP, pctExcOverlap); - Assert.assertEquals(metrics.PCT_TARGET_BASES_1X, pctTargetBases1x); - Assert.assertEquals(metrics.PCT_TARGET_BASES_2X, pctTargetBases2x); - Assert.assertEquals(metrics.MAX_TARGET_COVERAGE, maxTargetCoverage); - } + final HsMetrics metrics = readMetrics(outfile); + Assert.assertEquals(metrics.TOTAL_READS, totalReads); + Assert.assertEquals(metrics.PF_UQ_BASES_ALIGNED, pfUqBasesAligned); + Assert.assertEquals(metrics.PCT_EXC_BASEQ, pctExcBaseq); + Assert.assertEquals(metrics.PCT_EXC_OVERLAP, pctExcOverlap); + Assert.assertEquals(metrics.PCT_TARGET_BASES_1X, pctTargetBases1x); + Assert.assertEquals(metrics.PCT_TARGET_BASES_2X, pctTargetBases2x); + Assert.assertEquals(metrics.MAX_TARGET_COVERAGE, maxTargetCoverage); } @Test @@ -128,4 +149,48 @@ public void testCoverageHistogram() throws IOException { Assert.assertEquals(coverageHistogram.get(0).getValue(), 10.0); Assert.assertEquals(coverageHistogram.get(1).getValue(), 10.0); } + + @Test + public void testHsMetricsHandlesIndelsAppropriately() throws IOException { + final SAMRecordSetBuilder withDeletions = new SAMRecordSetBuilder(true, SortOrder.coordinate); + final SAMRecordSetBuilder withInsertions = new SAMRecordSetBuilder(true, SortOrder.coordinate); + final IntervalList targets = new IntervalList(withDeletions.getHeader()); + final IntervalList baits = new IntervalList(withDeletions.getHeader()); + targets.add(new Interval("chr1", 1000, 1199, false, "t1")); + baits.add(new Interval("chr1", 950, 1049, false, "b1")); + baits.add(new Interval("chr1", 1050, 1149, false, "b2")); + baits.add(new Interval("chr1", 1150, 1249, false, "b3")); + + // Generate 100 reads that fully cover the the target in each BAM + for (int i=0; i<100; ++i) { + withDeletions.addFrag( "d" + i, 0, 1000, false, false, "100M20D80M", null, 30); + withInsertions.addFrag("i" + i, 0, 1000, false, false, "100M50I100M", null, 30); + } + + // Write things out to file + final File dir = IOUtil.createTempDir("hsmetrics.", ".test"); + final File bs = new File(dir, "baits.interval_list").getAbsoluteFile(); + final File ts = new File(dir, "targets.interval_list").getAbsoluteFile(); + baits.write(bs); + targets.write(ts); + final File withDelBam = writeBam(withDeletions, new File(dir, "with_del.bam")); + final File withInsBam = writeBam(withInsertions, new File(dir, "with_ins.bam")); + + // Now run CollectHsMetrics four times + final File out = Files.createTempFile("hsmetrics.", ".txt").toFile(); + runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=false", "SAMPLE_SIZE=0", "TI=", ts.getPath(), "BI=", bs.getPath(), "O=", out.getPath(), "I=", withDelBam.getAbsolutePath())); + final HsMetrics delsWithoutIndelHandling = readMetrics(out); + runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=true", "SAMPLE_SIZE=0", "TI=", ts.getPath(), "BI=", bs.getPath(), "O=", out.getPath(), "I=", withDelBam.getAbsolutePath())); + final HsMetrics delsWithIndelHandling = readMetrics(out); + runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=false", "SAMPLE_SIZE=0", "TI=", ts.getPath(), "BI=", bs.getPath(), "O=", out.getPath(), "I=", withInsBam.getAbsolutePath())); + final HsMetrics insWithoutIndelHandling = readMetrics(out); + runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=true", "SAMPLE_SIZE=0", "TI=", ts.getPath(), "BI=", bs.getPath(), "O=", out.getPath(), "I=", withInsBam.getAbsolutePath())); + final HsMetrics insWithIndelHandling = readMetrics(out); + + Assert.assertEquals(delsWithoutIndelHandling.MEAN_TARGET_COVERAGE, 90.0); // 100X over 180/200 bases due to deletion + Assert.assertEquals(delsWithIndelHandling.MEAN_TARGET_COVERAGE, 100.0); // 100X with counting the deletion + + Assert.assertEquals(insWithoutIndelHandling.PCT_USABLE_BASES_ON_TARGET, 200/250d); // 50/250 inserted bases are not counted as on target + Assert.assertEquals(insWithIndelHandling.PCT_USABLE_BASES_ON_TARGET, 1.0d); // inserted bases are counted as on target + } } From f8d0b66ba6f6a90393c76bc033bf603f968c2580 Mon Sep 17 00:00:00 2001 From: tfenne Date: Fri, 25 Jan 2019 09:29:55 -0700 Subject: [PATCH 4/6] Fixed failing test. FWIW the barclayTest doesn't like it if you use runPicardCommandLine("I=", input) vs. runPicardCommandLine("I=" + input). --- .../picard/analysis/directed/CollectHsMetricsTest.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java b/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java index 9eda3826cb..bb2251e0d1 100644 --- a/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java +++ b/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java @@ -178,13 +178,13 @@ public void testHsMetricsHandlesIndelsAppropriately() throws IOException { // Now run CollectHsMetrics four times final File out = Files.createTempFile("hsmetrics.", ".txt").toFile(); - runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=false", "SAMPLE_SIZE=0", "TI=", ts.getPath(), "BI=", bs.getPath(), "O=", out.getPath(), "I=", withDelBam.getAbsolutePath())); + runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=false", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withDelBam.getAbsolutePath())); final HsMetrics delsWithoutIndelHandling = readMetrics(out); - runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=true", "SAMPLE_SIZE=0", "TI=", ts.getPath(), "BI=", bs.getPath(), "O=", out.getPath(), "I=", withDelBam.getAbsolutePath())); + runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=true", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withDelBam.getAbsolutePath())); final HsMetrics delsWithIndelHandling = readMetrics(out); - runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=false", "SAMPLE_SIZE=0", "TI=", ts.getPath(), "BI=", bs.getPath(), "O=", out.getPath(), "I=", withInsBam.getAbsolutePath())); + runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=false", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withInsBam.getAbsolutePath())); final HsMetrics insWithoutIndelHandling = readMetrics(out); - runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=true", "SAMPLE_SIZE=0", "TI=", ts.getPath(), "BI=", bs.getPath(), "O=", out.getPath(), "I=", withInsBam.getAbsolutePath())); + runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=true", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withInsBam.getAbsolutePath())); final HsMetrics insWithIndelHandling = readMetrics(out); Assert.assertEquals(delsWithoutIndelHandling.MEAN_TARGET_COVERAGE, 90.0); // 100X over 180/200 bases due to deletion From 767de3627e472eacba110680b2d99d67c5c65010 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Thu, 31 Jan 2019 13:12:40 -0700 Subject: [PATCH 5/6] Apply suggestions from code review Co-Authored-By: tfenne --- .../java/picard/analysis/directed/TargetMetricsCollector.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java index 529b5bf8c6..1d3e5f64ba 100644 --- a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java +++ b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java @@ -555,10 +555,10 @@ public void acceptRecord(final SAMRecord record) { // 2. The number of reads contributing to per-base coverage per target // 3. Unfiltered coverage information for het sensitivity // 4. The count of bases rejected for being low baseq or off-target - // 5. The count of on-target bases and on-target bases from paired reads + // 5. The count of overall on-target bases, and on-target bases from paired reads final Set coveredTargets = new HashSet<>(); // Each target is added to this the first time the read covers it int readOffset = 0; - int refOffset = rec.getAlignmentStart() -1; + int refOffset = rec.getAlignmentStart() - 1; for (final CigarElement cig : rec.getCigar()) { final CigarOperator op = cig.getOperator(); From 283341e592a4cb3d0fbde0422289bbf97481a6f7 Mon Sep 17 00:00:00 2001 From: tfenne Date: Thu, 31 Jan 2019 13:13:39 -0700 Subject: [PATCH 6/6] Changes to address yfarjoun's review comments. --- .../analysis/directed/TargetMetricsCollector.java | 13 +++++++++---- .../analysis/directed/CollectHsMetricsTest.java | 8 +++++--- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java index 1d3e5f64ba..d58c10716a 100644 --- a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java +++ b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java @@ -430,7 +430,7 @@ public Map getCoverageByTarget() { /** Adds information about an individual SAMRecord to the statistics. */ public void acceptRecord(final SAMRecord record) { // Just ignore secondary alignments altogether - if (record.getNotPrimaryAlignmentFlag()) return; + if (record.isSecondaryAlignment()) return; // Cache some things, and compute the total number of bases aligned in the record. final boolean mappedInPair = record.getReadPairedFlag() && !record.getReadUnmappedFlag() && !record.getMateUnmappedFlag() && !record.getSupplementaryAlignmentFlag(); @@ -583,10 +583,10 @@ public void acceptRecord(final SAMRecord record) { } // Then go through the per-target/per-base hq and unfiltered coverage - // The cutoff of >= 2 is because even the unfilteredCoverage doesn't want Q1 or Q0 bases! + // The cutoff of > 2 is because even the unfilteredCoverage doesn't want those bases if (qual > 2 && incrementPerTargetCoverage && onTarget) { for (final Interval target : targets) { - if (refPos >= target.getStart() && refPos <= target.getEnd()) { + if (overlapsInterval(refPos, target)) { final int targetOffset = refPos - target.getStart(); // Unfiltered first (for theoretical het sensitivity) @@ -618,11 +618,16 @@ public void acceptRecord(final SAMRecord record) { /* Returns true if the `pos` is between the `start` and `end` of at least one interval. */ private boolean overlapsAny(final int pos, final Collection intervals) { for (final Interval interval : intervals) { - if (pos >= interval.getStart() && pos <= interval.getEnd()) return true; + if (overlapsInterval(pos, interval)) return true; } return false; } + /** Returns true if the position is within the start-end range inclusive of the given interval. */ + private boolean overlapsInterval(final int pos, final Interval interval) { + return pos >= interval.getStart() && pos <= interval.getEnd(); + } + @Override public void finish() { metrics.PCT_PF_READS = metrics.PF_READS / (double) metrics.TOTAL_READS; diff --git a/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java b/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java index bb2251e0d1..d3403fb2ef 100644 --- a/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java +++ b/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java @@ -61,9 +61,9 @@ HsMetrics readMetrics(final File f) { /** Writes the contents of a SAMRecordSetBuilder out to a file. */ File writeBam(final SAMRecordSetBuilder builder, final File f) { - final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(builder.getHeader(), false, f); - builder.forEach(out::addAlignment); - out.close(); + try (final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(builder.getHeader(), false, f)) { + builder.forEach(out::addAlignment); + } return f; } @@ -187,6 +187,8 @@ public void testHsMetricsHandlesIndelsAppropriately() throws IOException { runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=true", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withInsBam.getAbsolutePath())); final HsMetrics insWithIndelHandling = readMetrics(out); + IOUtil.deleteDirectoryTree(dir); + Assert.assertEquals(delsWithoutIndelHandling.MEAN_TARGET_COVERAGE, 90.0); // 100X over 180/200 bases due to deletion Assert.assertEquals(delsWithIndelHandling.MEAN_TARGET_COVERAGE, 100.0); // 100X with counting the deletion