Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@

6. By-reference sub-assignments of strings to factor columns now _actually_ match the levels in UTF-8 when required and now don't result in invalid factors being created, [#7648](https://github.com/Rdatatable/data.table/issues/7648), amending a previous incomplete fix to [#6886](https://github.com/Rdatatable/data.table/issues/6886) in v1.17.2. Thanks @BASS-JN for the report and @aitap for the fix.

7. `fcoalesce()` and `setcoalesce()` could fail for inputs during implicit type coercions when items had different but still compatible underlying storage types (e.g., `Date` and `IDate`), #7545 (https://github.com/Rdatatable/data.table/issues/7545). This was particularly unexpected because `Date` objects may be stored as either integer or double. Thanks to @ethanbsmith for the report and @ben-schwen for the fix.

### Notes

1. {data.table} now depends on R 3.5.0 (2018).
Expand Down
8 changes: 8 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -15140,6 +15140,8 @@ date = as.Date(int, origin="1970-01-01")
date_val = as.Date(int_val, origin="1970-01-01")
idate = as.IDate(int, origin="1970-01-01")
idate_val = as.IDate(int_val, origin="1970-01-01")
date_int = .Date(3L)
date_int_val = .Date(int_val)
itime = as.ITime(int)
itime_val = as.ITime(int_val)
posix = as.POSIXct(int, origin="1970-01-01")
Expand All @@ -15163,6 +15165,12 @@ test(2060.015, fcoalesce(itime, as.ITime(3L)), itime_val)
test(2060.016, fcoalesce(itime, as.ITime(NA), as.ITime(3L)), itime_val)
test(2060.017, fcoalesce(posix, as.POSIXct(3L, origin="1970-01-01")), posix_val)
test(2060.018, fcoalesce(posix, as.POSIXct(NA_integer_, origin="1970-01-01"), as.POSIXct(3L, origin="1970-01-01")), posix_val)
# allow mixed underlying types of date class, as long as they can be coerced to the same class #7545
test(2060.019, fcoalesce(date, date_int), date_val)
test(2060.020, fcoalesce(date, as.IDate("1970-01-04")), date_val)
test(2060.021, fcoalesce(idate, date_val), idate_val)
test(2060.022, typeof(fcoalesce(date, date_int_val)), "double")
test(2060.023, typeof(fcoalesce(idate, date_val)), "integer")
# vector replacements
test(2060.051, fcoalesce(bool, rep(TRUE, 3L)), bool_val)
test(2060.052, fcoalesce(bool, rep(NA, 3L), rep(TRUE, 3L)), bool_val)
Expand Down
34 changes: 26 additions & 8 deletions src/coalesce.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@
- The replacement of NAs with non-NA values from subsequent vectors
- The conditional checks within parallelized loops
*/
static bool compatibleDateTypes(SEXP x, SEXP y) {
if (!INHERITS(x, char_Date) || !INHERITS(y, char_Date)) return false;
const SEXPTYPE xType = TYPEOF(x), yType = TYPEOF(y);
if ((xType != INTSXP && xType != REALSXP) || (yType != INTSXP && yType != REALSXP)) return false;
if (xType == INTSXP && yType == REALSXP && !fitsInInt32(y)) return false;
return true;
}

SEXP coalesce(SEXP x, SEXP inplaceArg, SEXP nan_is_na_arg) {
if (TYPEOF(x)!=VECSXP) internal_error(__func__, "input is list(...) at R level"); // # nocov
if (!IS_TRUE_OR_FALSE(inplaceArg)) internal_error(__func__, "argument 'inplaceArg' must be TRUE or FALSE"); // # nocov
Expand All @@ -31,6 +39,7 @@ SEXP coalesce(SEXP x, SEXP inplaceArg, SEXP nan_is_na_arg) {
if (nval==0) return first;
const bool factor = isFactor(first);
const int nrow = length(first);
SEXP items = PROTECT(allocVector(VECSXP, nval)); nprotect++;
for (int i=0; i<nval; ++i) {
SEXP item = VECTOR_ELT(x, i+off);
if (factor) {
Expand All @@ -43,13 +52,22 @@ SEXP coalesce(SEXP x, SEXP inplaceArg, SEXP nan_is_na_arg) {
if (isFactor(item))
error(_("Item %d is a factor but item 1 is not a factor. When factors are involved, all items must be factor."), i+2);
}
bool sameClass = R_compute_identical(PROTECT(getAttrib(first, R_ClassSymbol)), PROTECT(getAttrib(item, R_ClassSymbol)), 0);
UNPROTECT(2);
const bool dateCompatible = compatibleDateTypes(first, item);
if (dateCompatible && (TYPEOF(first) != TYPEOF(item) || !sameClass)) {
SEXP coerced = PROTECT(coerceAs(item, first, ScalarLogical(TRUE)));
item = coerced;
sameClass = true;
UNPROTECT(1);
}
if (TYPEOF(first) != TYPEOF(item))
error(_("Item %d is type %s but the first item is type %s. Please coerce before coalescing."), i+2, type2char(TYPEOF(item)), type2char(TYPEOF(first)));
if (!R_compute_identical(PROTECT(getAttrib(first, R_ClassSymbol)), PROTECT(getAttrib(item, R_ClassSymbol)), 0))
if (!sameClass)
error(_("Item %d has a different class than item 1."), i+2);
UNPROTECT(2);
if (length(item)!=1 && length(item)!=nrow)
error(_("Item %d is length %d but the first item is length %d. Only singletons are recycled."), i+2, length(item), nrow);
SET_VECTOR_ELT(items, i, item);
}
if (!inplace) {
first = PROTECT(copyAsPlain(first)); nprotect++;
Expand All @@ -61,7 +79,7 @@ SEXP coalesce(SEXP x, SEXP inplaceArg, SEXP nan_is_na_arg) {
case INTSXP: {
int *xP = INTEGER(first), k=0, finalVal=NA_INTEGER;
for (int j=0; j<nval; ++j) {
SEXP item = VECTOR_ELT(x, j+off);
SEXP item = VECTOR_ELT(items, j);
if (length(item)==1) {
int tt = INTEGER(item)[0];
if (tt==NA_INTEGER) continue; // singleton NA can be skipped
Expand All @@ -84,7 +102,7 @@ SEXP coalesce(SEXP x, SEXP inplaceArg, SEXP nan_is_na_arg) {
int64_t *xP=(int64_t *)REAL(first), finalVal=NA_INTEGER64;
int k=0;
for (int j=0; j<nval; ++j) {
SEXP item = VECTOR_ELT(x, j+off);
SEXP item = VECTOR_ELT(items, j);
if (length(item)==1) {
int64_t tt = ((int64_t *)REAL(item))[0];
if (tt==NA_INTEGER64) continue;
Expand All @@ -106,7 +124,7 @@ SEXP coalesce(SEXP x, SEXP inplaceArg, SEXP nan_is_na_arg) {
int k=0;
if (nan_is_na) {
for (int j=0; j<nval; ++j) {
SEXP item = VECTOR_ELT(x, j+off);
SEXP item = VECTOR_ELT(items, j);
if (length(item)==1) {
double tt = REAL(item)[0];
if (ISNAN(tt)) continue;
Expand All @@ -125,7 +143,7 @@ SEXP coalesce(SEXP x, SEXP inplaceArg, SEXP nan_is_na_arg) {
}
} else {
for (int j=0; j<nval; ++j) {
SEXP item = VECTOR_ELT(x, j+off);
SEXP item = VECTOR_ELT(items, j);
if (length(item)==1) {
double tt = REAL(item)[0];
if (ISNA(tt)) continue;
Expand All @@ -149,7 +167,7 @@ SEXP coalesce(SEXP x, SEXP inplaceArg, SEXP nan_is_na_arg) {
Rcomplex *xP = COMPLEX(first), finalVal=NA_CPLX;
int k=0;
for (int j=0; j<nval; ++j) {
SEXP item = VECTOR_ELT(x, j+off);
SEXP item = VECTOR_ELT(items, j);
if (length(item)==1) {
Rcomplex tt = COMPLEX(item)[0];
if (ISNAN(tt.r) && ISNAN(tt.i)) continue;
Expand All @@ -172,7 +190,7 @@ SEXP coalesce(SEXP x, SEXP inplaceArg, SEXP nan_is_na_arg) {
SEXP finalVal=NA_STRING;
int k=0;
for (int j=0; j<nval; ++j) {
SEXP item = VECTOR_ELT(x, j+off);
SEXP item = VECTOR_ELT(items, j);
if (length(item)==1) {
SEXP tt = STRING_ELT(item,0);
if (tt==NA_STRING) continue;
Expand Down
Loading