-
Notifications
You must be signed in to change notification settings - Fork 60
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Column iterators should check the number rows (#432)
* add another empty file for testing * add empty file tests with varied ordering * check that idx has rows before creating col iterator * update debug file * add test for all empty multi file * check rows exist before changing iterator * modify variable names * don't need this test file anymore * use inline test examples * restyle test file * add NEWS bullet
- Loading branch information
Showing
4 changed files
with
83 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,43 +1,28 @@ | ||
devtools::clean_dll() | ||
devtools::load_all() | ||
|
||
# https:/tidyverse/vroom/issues/429 | ||
x <- vroom::vroom("investigations/my_file.CSV") # often segfaults here | ||
tail(x[,12:15]) | ||
# Setup | ||
|
||
# here's where the problem is, when it occurs | ||
# delimited_index.cc line 416 | ||
# has_quote = *begin == quote_; | ||
# EXC_BAD_ACCESS | ||
# in the bad situation, begin == end and both are `\0` | ||
# happens when file is missing a final newline AND is missing the final field | ||
# the pointers to this field's beginning and end are the same and are | ||
# essentially past (at) the end of the file | ||
# create the files however you'd prefer | ||
# but they can't be temp files | ||
destdir <- "../think_tank/testFiles/" | ||
|
||
# the segfault always seems to happen in RStudio | ||
#brio::write_lines("a", file.path(destdir, "no-rows.tsv")) | ||
#brio::write_lines(c("a","x"), file.path(destdir, "one-row.tsv")) | ||
|
||
# once I'm in lldb inside VS Code, the bug usually goes away, i.e. dereferencing | ||
# *begin is OK | ||
# but ultimately it is clear which line is the problem (see above) | ||
# Debugging | ||
|
||
# if I specify the last col_type, reading (appears to) work | ||
# x <- vroom::vroom( | ||
# "investigations/my_file.CSV", | ||
# col_types = list(Average_SOC_Required = "n") | ||
# ) | ||
# tail(x[,12:15]) # but now it crashes here | ||
devtools::clean_dll() | ||
devtools::load_all() | ||
|
||
# can I make this happen with a smaller file if I change | ||
# guess_max or the buffer size? nope, I never succeeded to do this | ||
|
||
# small example to better understand the index | ||
# writeChar("X,Y\r\na,bbb\r\ne,", "investigations/mini.csv", eos = NULL) | ||
# | ||
# | | | idx_[0] | ||
# | | | | | | idx_[1] | ||
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 | ||
# X , Y \r \n a , b b b \r \n e , | ||
# also, when I'm in debug mode in VSCode | ||
# if I have altrep = TRUE (the default) it doesn't segfault | ||
# but this is only true when I'm using the debugger in VSCode | ||
# I've also been setting num_threads to 1 to make it easier to debug | ||
vroom::vroom( | ||
file.path(destdir, c("no-rows.tsv", "one-row.tsv") | ||
), altrep = FALSE, num_threads = 1, delim = "\t") | ||
|
||
# withr::with_envvar(c("VROOM_CONNECTION_SIZE" = 10), { | ||
# x <- vroom(file("investigations/mini.csv")) | ||
# }) | ||
#vroom::vroom(c( | ||
#"../think_tank/testFiles/also-one-row.tsv", | ||
#"../think_tank/testFiles/no-rows.tsv", | ||
#"../think_tank/testFiles/another-one-row.tsv"), | ||
# altrep = FALSE, num_threads = 1) |