Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add flag to keep zero length intervals when converting bed -> interval_list #1928

Merged
merged 2 commits into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions src/main/java/picard/util/BedToIntervalList.java
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,13 @@ public class BedToIntervalList extends CommandLineProgram {
@Argument(doc = "If true, entries that are on contig-names that are missing from the provided dictionary will be dropped.")
public boolean DROP_MISSING_CONTIGS = false;

@Argument(doc = "If true, write length zero intervals in input bed file to resulting interval list file.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we say "zero-length intervals" instead of "length zero intervals". I know it's not critical, just for the sake of consistency

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually like "length zero" better than "zero length" so I switched the labelings everywhere to make it consistent.

public boolean KEEP_ZERO_LENGTH_INTERVALS = false;

private final Log LOG = Log.getInstance(getClass());
private int missingIntervals = 0;
private int missingRegion = 0;
private int zeroLengthIntervals = 0;

@Override
protected int doWork() {
Expand Down Expand Up @@ -168,8 +172,18 @@ protected int doWork() {
}

final boolean isNegativeStrand = bedFeature.getStrand() == Strand.NEGATIVE;
final Interval interval = new Interval(sequenceName, start, end, isNegativeStrand, name);
intervalList.add(interval);

// Use end+1 since bed start gets shifted by 1 using 1-based coordinates
if ((start == end+1) && !KEEP_ZERO_LENGTH_INTERVALS) {
LOG.info(String.format("Skipping writing zero length interval at %s:%d-%d.", sequenceName, start, end));
} else {
final Interval interval = new Interval(sequenceName, start, end, isNegativeStrand, name);
intervalList.add(interval);
}

if (start == end+1) {
zeroLengthIntervals++;
}

progressLogger.record(sequenceName, start);
}
Expand All @@ -182,6 +196,19 @@ protected int doWork() {
LOG.warn(String.format("There were %d missing regions with a total of %d bases", missingIntervals, missingRegion));
}
}

if (!KEEP_ZERO_LENGTH_INTERVALS) {
if (zeroLengthIntervals == 0) {
LOG.info("No input regions had length zero, so none were skipped.");
} else {
LOG.info(String.format("Skipped writing a total of %d entries with length zero in the input file.", zeroLengthIntervals));
}
} else {
if (zeroLengthIntervals > 0) {
LOG.warn(String.format("Input file had %d entries with length zero. Run with the KEEP_ZERO_LENGTH_INTERVALS flag set to false to remove these.", zeroLengthIntervals));
}
}

// Sort and write the output
IntervalList out = intervalList;
if (SORT) {
Expand Down
23 changes: 18 additions & 5 deletions src/test/java/picard/util/BedToIntervalListTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public class BedToIntervalListTest {

private static final String TEST_DATA_DIR = "testdata/picard/util/BedToIntervalListTest";

private void doTest(final String inputBed, final String header) throws IOException, SAMException {
private void doTest(final String inputBed, final String header, boolean keepZeroLength) throws IOException, SAMException {
final File outputFile = File.createTempFile("bed_to_interval_list_test.", ".interval_list");
outputFile.deleteOnExit();
final BedToIntervalList program = new BedToIntervalList();
Expand All @@ -25,6 +25,7 @@ private void doTest(final String inputBed, final String header) throws IOExcepti
program.SEQUENCE_DICTIONARY = new File(TEST_DATA_DIR, header);
program.OUTPUT = outputFile;
program.UNIQUE = true;
program.KEEP_ZERO_LENGTH_INTERVALS = keepZeroLength;
program.doWork();

// Assert they are equal
Expand All @@ -34,25 +35,30 @@ private void doTest(final String inputBed, final String header) throws IOExcepti

@Test(dataProvider = "testBedToIntervalListDataProvider")
public void testBedToIntervalList(final String inputBed) throws IOException {
doTest(inputBed, "header.sam");
doTest(inputBed, "header.sam", true);
}

// test a fixed bed file using different dictionaries
@Test(dataProvider = "testBedToIntervalListSequenceDictionaryDataProvider")
public void testBedToIntervalListSequenceDictionary(final String dictionary) throws IOException {
doTest("seq_dict_test.bed", dictionary);
doTest("seq_dict_test.bed", dictionary, true);
}

// test for back dictionaries - we expect these to throw exceptions
@Test(dataProvider = "testBedToIntervalListSequenceDictionaryBadDataProvider",
expectedExceptions = {SAMException.class, PicardException.class})
public void testBedToIntervalListBadSequenceDictionary(final String dictionary) throws IOException {
doTest("seq_dict_test.bed", dictionary);
doTest("seq_dict_test.bed", dictionary, true);
}

@Test(dataProvider = "testBedToIntervalListOutOfBoundsDataProvider", expectedExceptions = PicardException.class)
public void testBedToIntervalListOutOfBounds(final String inputBed) throws IOException {
doTest(inputBed, "header.sam");
doTest(inputBed, "header.sam", true);
}

@Test(dataProvider = "testZeroLengthIntervalsSkippedProvider")
public void testZeroLengthIntervalsSkipped(final String inputBed) throws IOException {
doTest(inputBed, "header.sam", false);
}

@DataProvider
Expand Down Expand Up @@ -103,4 +109,11 @@ public Object[][] testBedToIntervalListOutOfBoundsDataProvider() {
{"off_by_one_interval.bed"}
};
}

@DataProvider
public Object[][] testZeroLengthIntervalsSkippedProvider() {
return new Object[][]{
{"zero_length_test.bed"}
};
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
chr1 1000 1000
chr2 1000 2000
chr3 1000 1000
chr4 1000 2000
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
@HD VN:1.6 SO:coordinate
@SQ SN:chr1 LN:1000000
@SQ SN:chr2 LN:1000000
@SQ SN:chr3 LN:1000000
@SQ SN:chr4 LN:1000000
@SQ SN:chr5 LN:1000000
@SQ SN:chr6 LN:1000000
@SQ SN:chr7 LN:1000000
@SQ SN:chr8 LN:1000000
chr2 1001 2000 + .
chr4 1001 2000 + .
Loading