-
Couldn't load subscription status.
- Fork 1k
Closed
Description
Hi,
after upgrading to today’s dev version from github, some of my merges stopped working correctly. Took me some time to track down, it seems to be connected to fread and using two keys. With previous dev versions of 1.9.5 it worked, so I think it is connected to 14e39e4 or 44b1e00.
Reproduce with:
library('data.table')
(x1 <- data.table(a1 = c('a', 'b', 'c'), a2 = c(1, 3, 2)))
write.csv(x1, 'x1.csv', row.names = FALSE)
(x2 <- fread('x1.csv'))
y <- data.table(a2 = 1:3)
setkey(y, a2)
setkey(x1, a1, a2)
setkey(x2, a1, a2)
merge(x1, y) # OK: 3 rows
merge(x2, y) # FAIL: 2 rows
merge(x2, y, by = 'a2') # FAIL: 2 rows
setkey(x2, a2)
merge(x2, y) # OK: 3 rows
# different behaviour with character keys!
(x1 <- data.table(a1 = c('a', 'b', 'c'), a2 = c('a', 'c', 'b')))
write.csv(x1, 'x1.csv', row.names = FALSE)
(x2 <- fread('x1.csv'))
y <- data.table(a2 = c('a', 'b', 'c'))
setkey(y, a2)
setkey(x1, a1, a2)
setkey(x2, a1, a2)
merge(x1, y) # FAIL: 2 rows
merge(x2, y) # FAIL: 2 rows
merge(x2, y, by = 'a2') # FAIL: 2 rows
setkey(x2, a2)
merge(x2, y) # OK: 3 rows
This is my output:
> library('data.table')
data.table 1.9.5 For help type ?data.table or https://github.com/Rdatatable/data.table/wiki
> (x1 <- data.table(a1 = c('a', 'b', 'c'), a2 = c(1, 3, 2)))
a1 a2
1: a 1
2: b 3
3: c 2
> write.csv(x1, 'x1.csv', row.names = FALSE)
> (x2 <- fread('x1.csv'))
a1 a2
1: a 1
2: b 3
3: c 2
> y <- data.table(a2 = 1:3)
> setkey(y, a2)
> setkey(x1, a1, a2)
> setkey(x2, a1, a2)
> merge(x1, y) # OK: 3 rows
a2 a1
1: 1 a
2: 2 c
3: 3 b
> merge(x2, y) # FAIL: 2 rows
a2 a1
1: 1 a
2: 3 b
> merge(x2, y, by = 'a2') # FAIL: 2 rows
a2 a1
1: 1 a
2: 3 b
> setkey(x2, a2)
> merge(x2, y) # OK: 3 rows
a2 a1
1: 1 a
2: 2 c
3: 3 b
> # different behaviour with character keys!
>
> (x1 <- data.table(a1 = c('a', 'b', 'c'), a2 = c('a', 'c', 'b')))
a1 a2
1: a a
2: b c
3: c b
> write.csv(x1, 'x1.csv', row.names = FALSE)
> (x2 <- fread('x1.csv'))
a1 a2
1: a a
2: b c
3: c b
> y <- data.table(a2 = c('a', 'b', 'c'))
> setkey(y, a2)
> setkey(x1, a1, a2)
> setkey(x2, a1, a2)
> merge(x1, y) # FAIL: 2 rows
a2 a1
1: a a
2: c b
> merge(x2, y) # FAIL: 2 rows
a2 a1
1: a a
2: c b
> merge(x2, y, by = 'a2') # FAIL: 2 rows
a2 a1
1: a a
2: c b
> setkey(x2, a2)
> merge(x2, y) # OK: 3 rows
a2 a1
1: a a
2: b c
3: c bThanks!
Bela