Skip to content

Commit

Permalink
Reworked subset.c (#3170)
Browse files Browse the repository at this point in the history
  • Loading branch information
mattdowle authored Dec 1, 2018
1 parent e2b1853 commit 6054d7f
Show file tree
Hide file tree
Showing 4 changed files with 218 additions and 233 deletions.
14 changes: 7 additions & 7 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -690,11 +690,12 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
else stop("i evaluates to a logical vector length ", length(i), " but there are ", nrow(x), " rows. Recycling of logical i is no longer allowed as it hides more bugs than is worth the rare convenience. Explicitly use rep(...,length=.N) if you really need to recycle.")
} else {
irows = as.integer(i) # e.g. DT[c(1,3)] and DT[c(-1,-3)] ok but not DT[c(1,-3)] (caught as error)
irows = .Call(CconvertNegativeIdx, irows, nrow(x)) # simplifies logic from here on (can assume positive subscripts)
# maintains Arun's fix for #2697 (test 1042)
# efficient in C with more detailed messages
# falls through quickly (no R level allocs) if no negatives
# minor TO DO: can we merge this with check_idx in fcast.c/subset ?
irows = .Call(CconvertNegAndZeroIdx, irows, nrow(x), is.null(jsub) || root!=":=") # last argument is allowOverMax (NA when selecting, error when assigning)
# simplifies logic from here on: can assume positive subscripts (no zeros)
# maintains Arun's fix for #2697 (test 1042)
# efficient in C with more detailed helpful messages when user mixes positives and negatives
# falls through quickly (no R level allocs) if all items are within range [1,max] with no zeros or negatives
# minor TO DO: can we merge this with check_idx in fcast.c/subset ?
}
}
if (notjoin) {
Expand Down Expand Up @@ -1789,8 +1790,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
grporder = o__
# for #971, added !GForce. if (GForce) we do it much more (memory) efficiently than subset of order vector below.
if (length(irows) && !isTRUE(irows) && !GForce) {
# fix for bug #2758. TO DO: provide a better error message
if (length(irows) > 1L && length(zo__ <- which(irows == 0)) > 0L) stop("i[", zo__[1L], "] is 0. While grouping, i=0 is allowed when it's the only value. When length(i) > 1, all i should be > 0.")
# any zeros in irows were removed by convertNegAndZeroIdx earlier above; no need to check for zeros again. Test 1058-1061 check case #2758.
if (length(o__) && length(irows)!=length(o__)) stop("Internal error: length(irows)!=length(o__)") # nocov
o__ = if (length(o__)) irows[o__] # better do this once up front (even though another alloc) than deep repeated branch in dogroups.c
else irows
Expand Down
19 changes: 11 additions & 8 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -2963,11 +2963,14 @@ test(1041, rbindlist(l), data.table(i1=factor(letters[1:5]),val=1:5))

# negative indexing in *i* leads to crash/wrong aggregates when dogroups is called. bug #2697
DT = data.table(x = c(1,2,3,4,5), group = c(1,1,2,2,3))
test(1042, DT[-5, mean(x), by = group], data.table(group=c(1,2), V1=c(1.5, 3.5)))
test(1042.1, DT[-5, mean(x), by = group], data.table(group=c(1,2), V1=c(1.5, 3.5)))
# Test when abs(negative index) > nrow(dt) - should warn
test(1042.1, DT[-10], DT, warning="Item 1 of i is -10 but there are only 5 rows. Ignoring this and 0 more like it out of 1.")
test(1042.2, DT[c(-5, -10), mean(x), by = group], data.table(group=c(1,2),V1=c(1.5,3.5)), warning="Item 2 of i is -10 but there are only 5 rows. Ignoring this and 0 more like it out of 2.")
test(1043, DT[c(1, -5)], error="Cannot mix positives and negatives.")
test(1042.2, DT[-10], DT, warning="Item 1 of i is -10 but there are only 5 rows. Ignoring this and 0 more like it out of 1.")
test(1042.3, DT[c(-5, -10), mean(x), by = group], data.table(group=c(1,2),V1=c(1.5,3.5)), warning="Item 2 of i is -10 but there are only 5 rows. Ignoring this and 0 more like it out of 2.")
test(1042.4, DT[c(-5, -4, -5)], DT[1:3], warning="Item 3 of i is -5 which removes that item but that has occurred before. Ignoring this dup and 0 other dup")
test(1042.5, DT[c(-5, -4, -5, -5, -4)], DT[1:3], warning="Item 3 of i is -5 which removes that item but that has occurred before. Ignoring this dup and 2 other dup")
test(1043.1, DT[c(1, -5)], error="Item 2 of i is -5 and item 1 is 1. Cannot mix positives and negatives.")
test(1043.2, DT[c(-1, NA)], error="Item 1 of i is -1 and item 2 is NA. Cannot mix negatives and NA.")

# crash (floating point exception), when assigning null data.table() to multiple cols, #4731
DT = data.table(x=1:5,y=6:10)
Expand Down Expand Up @@ -2998,10 +3001,10 @@ test(1057, print(DT, digits=2, big.mark=","), output=" x y logy\n1: a

# bug #2758 fix - segfault with zeros in i and factors in by
x <- data.table(letters=letters[1:5], factor=factor(letters[1:5]), number=1:5)
test(1058, x[c(0, 3), list(letters, number), by=factor], error="While grouping, i=0 is allowed")
test(1059, x[c(3, 0), list(letters, number), by=factor], error="While grouping, i=0 is allowed")
test(1060, x[c(0, 3), number:=5L, by=factor], error="While grouping, i=0 is allowed")
test(1061, x[c(0, 3), number:=5L], data.table(letters=letters[1:5], factor=factor(letters[1:5]), number=c(1:2,5L,4:5)))
test(1058, x[c(0, 3), list(letters, number), by=factor], ans<-x[3,c(2,1,3)])
test(1059, x[c(3, 0), list(letters, number), by=factor], ans)
test(1060, x[c(0, 3), number:=5L, by=factor], ans<-data.table(letters=letters[1:5], factor=factor(letters[1:5]), number=c(1:2,5L,4:5)))
test(1061, x[c(0, 3), number:=5L], ans)

# bug #2440 fix - seqfault when j refers to grouping variable when results are empty
DT = data.table(x=rep(c("a","b"),each=3),v=c(42,42,42,4,5,6))
Expand Down
4 changes: 2 additions & 2 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ SEXP binary();
SEXP chmatch2();
SEXP subsetDT();
SEXP subsetVector();
SEXP convertNegativeIdx();
SEXP convertNegAndZeroIdx();
SEXP frank();
SEXP dt_na();
SEXP lookup();
Expand Down Expand Up @@ -127,7 +127,7 @@ R_CallMethodDef callMethods[] = {
{"Cchmatch2", (DL_FUNC) &chmatch2, -1},
{"CsubsetDT", (DL_FUNC) &subsetDT, -1},
{"CsubsetVector", (DL_FUNC) &subsetVector, -1},
{"CconvertNegativeIdx", (DL_FUNC) &convertNegativeIdx, -1},
{"CconvertNegAndZeroIdx", (DL_FUNC) &convertNegAndZeroIdx, -1},
{"Cfrank", (DL_FUNC) &frank, -1},
{"Cdt_na", (DL_FUNC) &dt_na, -1},
{"Clookup", (DL_FUNC) &lookup, -1},
Expand Down
Loading

0 comments on commit 6054d7f

Please sign in to comment.