From bdfec8d26741d9568bd4ac61f32095bfae512a51 Mon Sep 17 00:00:00 2001 From: venom1204 Date: Thu, 10 Jul 2025 07:15:12 +0000 Subject: [PATCH 1/6] updated example --- vignettes/datatable-reshape.Rmd | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/vignettes/datatable-reshape.Rmd b/vignettes/datatable-reshape.Rmd index 68ef57e6cf..af13ba6591 100644 --- a/vignettes/datatable-reshape.Rmd +++ b/vignettes/datatable-reshape.Rmd @@ -142,18 +142,18 @@ So far we've seen features of `melt` and `dcast` that are implemented efficientl However, there are situations we might run into where the desired operation is not expressed in a straightforward manner. For example, consider the `data.table` shown below: ```{r} -s2 <- "family_id age_mother dob_child1 dob_child2 dob_child3 gender_child1 gender_child2 gender_child3 -1 30 1998-11-26 2000-01-29 NA 1 2 NA -2 27 1996-06-22 NA NA 2 NA NA -3 26 2002-07-11 2004-04-05 2007-09-02 2 2 1 -4 32 2004-10-10 2009-08-27 2012-07-21 1 1 1 -5 29 2000-12-05 2005-02-28 NA 2 1 NA" +s2 <- "family_id age_mother name_child1 name_child2 name_child3 gender_child1 gender_child2 gender_child3 +1 30 'Ben' 'Anna' NA 1 2 NA +2 27 'Tom' NA NA 2 NA NA +3 26 'Lia' 'Sam' 'Amy' 2 2 1 +4 32 'Max' 'Zoe' 'Joe' 1 1 1 +5 29 'Dan' 'Eva' NA 2 1 NA" DT <- fread(s2) DT ## 1 = female, 2 = male ``` -And you'd like to combine (`melt`) all the `dob` columns together, and `gender` columns together. Using the old functionality, we could do something like this: +And you'd like to combine (`melt`) all the `name` columns together, and `gender` columns together. Using the old functionality, we could do something like this: ```{r} DT.m1 = melt(DT, id.vars = c("family_id", "age_mother")) @@ -161,12 +161,12 @@ DT.m1[, c("variable", "child") := tstrsplit(variable, "_", fixed = TRUE)] DT.c1 = dcast(DT.m1, family_id + age_mother + child ~ variable, value.var = "value") DT.c1 -str(DT.c1) ## gender column is class IDate now! +str(DT.c1) ## gender column is character type now! ``` #### Issues -1. What we wanted to do was to combine all the `dob` and `gender` type columns together respectively. Instead, we are combining *everything* together, and then splitting them again. I think it's easy to see that it's quite roundabout (and inefficient). +1. What we wanted to do was to combine all the `name` and `gender` type columns together respectively. Instead, we are combining *everything* together, and then splitting them again. I think it's easy to see that it's quite roundabout (and inefficient). As an analogy, imagine you've a closet with four shelves of clothes and you'd like to put together the clothes from shelves 1 and 2 together (in 1), and 3 and 4 together (in 3). What we are doing is more or less to combine all the clothes together, and then split them back on to shelves 1 and 3! @@ -189,9 +189,9 @@ Since we'd like for `data.table`s to perform this operation straightforward and The idea is quite simple. We pass a list of columns to `measure.vars`, where each element of the list contains the columns that should be combined together. ```{r} -colA = paste0("dob_child", 1:3) +colA = paste0("name_child", 1:3) colB = paste0("gender_child", 1:3) -DT.m2 = melt(DT, measure.vars = list(colA, colB), value.name = c("dob", "gender")) +DT.m2 = melt(DT, measure.vars = list(colA, colB), value.name = c("name", "gender")) DT.m2 str(DT.m2) ## col type is preserved @@ -206,7 +206,7 @@ str(DT.m2) ## col type is preserved Usually in these problems, the columns we'd like to melt can be distinguished by a common pattern. We can use the function `patterns()`, implemented for convenience, to provide regular expressions for the columns to be combined together. The above operation can be rewritten as: ```{r} -DT.m2 = melt(DT, measure.vars = patterns("^dob", "^gender"), value.name = c("dob", "gender")) +DT.m2 = melt(DT, measure.vars = patterns("^name", "^gender"), value.name = c("name", "gender")) DT.m2 ``` @@ -305,7 +305,7 @@ We can now provide **multiple `value.var` columns** to `dcast` for `data.table`s ```{r} ## new 'cast' functionality - multiple value.vars -DT.c2 = dcast(DT.m2, family_id + age_mother ~ variable, value.var = c("dob", "gender")) +DT.c2 = dcast(DT.m2, family_id + age_mother ~ variable, value.var = c("name", "gender")) DT.c2 ``` From b051ad351e19a7aef14f2c6233a910e0ebff0585 Mon Sep 17 00:00:00 2001 From: venom1204 Date: Thu, 10 Jul 2025 16:51:31 +0530 Subject: [PATCH 2/6] Update datatable-reshape.Rmd --- vignettes/datatable-reshape.Rmd | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/vignettes/datatable-reshape.Rmd b/vignettes/datatable-reshape.Rmd index af13ba6591..5554695d25 100644 --- a/vignettes/datatable-reshape.Rmd +++ b/vignettes/datatable-reshape.Rmd @@ -142,13 +142,13 @@ So far we've seen features of `melt` and `dcast` that are implemented efficientl However, there are situations we might run into where the desired operation is not expressed in a straightforward manner. For example, consider the `data.table` shown below: ```{r} -s2 <- "family_id age_mother name_child1 name_child2 name_child3 gender_child1 gender_child2 gender_child3 -1 30 'Ben' 'Anna' NA 1 2 NA -2 27 'Tom' NA NA 2 NA NA -3 26 'Lia' 'Sam' 'Amy' 2 2 1 -4 32 'Max' 'Zoe' 'Joe' 1 1 1 -5 29 'Dan' 'Eva' NA 2 1 NA" -DT <- fread(s2) +s2 = "family_id age_mother name_child1 name_child2 name_child3 gender_child1 gender_child2 gender_child3 + 1 30 Ben Anna NA 1 2 NA + 2 27 Tom NA NA 2 NA NA + 3 26 Lia Sam Amy 2 2 1 + 4 32 Max Zoe Joe 1 1 1 + 5 29 Dan Eva NA 2 1 NA" +DT = fread(s2) DT ## 1 = female, 2 = male ``` From 21f7a7b35a548974ade8e5fecb6a77493a8ddecc Mon Sep 17 00:00:00 2001 From: venom1204 Date: Thu, 10 Jul 2025 17:05:49 +0530 Subject: [PATCH 3/6] Update datatable-reshape.Rmd --- vignettes/datatable-reshape.Rmd | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/vignettes/datatable-reshape.Rmd b/vignettes/datatable-reshape.Rmd index 5554695d25..695f4d95bd 100644 --- a/vignettes/datatable-reshape.Rmd +++ b/vignettes/datatable-reshape.Rmd @@ -142,13 +142,13 @@ So far we've seen features of `melt` and `dcast` that are implemented efficientl However, there are situations we might run into where the desired operation is not expressed in a straightforward manner. For example, consider the `data.table` shown below: ```{r} -s2 = "family_id age_mother name_child1 name_child2 name_child3 gender_child1 gender_child2 gender_child3 - 1 30 Ben Anna NA 1 2 NA - 2 27 Tom NA NA 2 NA NA - 3 26 Lia Sam Amy 2 2 1 - 4 32 Max Zoe Joe 1 1 1 - 5 29 Dan Eva NA 2 1 NA" -DT = fread(s2) +s2 <- "family_id age_mother name_child1 name_child2 name_child3 gender_child1 gender_child2 gender_child3 +1 30 Ben Anna NA 1 2 NA +2 27 Tom NA NA 2 NA NA +3 26 Lia Sam Amy 2 2 1 +4 32 Max Zoe Joe 1 1 1 +5 29 Dan Eva NA 2 1 NA" +DT <- fread(s2) DT ## 1 = female, 2 = male ``` From 127f79ddd9c6ad51a2fb0f5f74d0dd5ce3e6a557 Mon Sep 17 00:00:00 2001 From: venom1204 Date: Thu, 10 Jul 2025 17:12:22 +0530 Subject: [PATCH 4/6] indentation --- vignettes/datatable-reshape.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/datatable-reshape.Rmd b/vignettes/datatable-reshape.Rmd index 695f4d95bd..55b8ea36bf 100644 --- a/vignettes/datatable-reshape.Rmd +++ b/vignettes/datatable-reshape.Rmd @@ -142,13 +142,13 @@ So far we've seen features of `melt` and `dcast` that are implemented efficientl However, there are situations we might run into where the desired operation is not expressed in a straightforward manner. For example, consider the `data.table` shown below: ```{r} -s2 <- "family_id age_mother name_child1 name_child2 name_child3 gender_child1 gender_child2 gender_child3 +s2 = "family_id age_mother name_child1 name_child2 name_child3 gender_child1 gender_child2 gender_child3 1 30 Ben Anna NA 1 2 NA 2 27 Tom NA NA 2 NA NA 3 26 Lia Sam Amy 2 2 1 4 32 Max Zoe Joe 1 1 1 5 29 Dan Eva NA 2 1 NA" -DT <- fread(s2) +DT = fread(s2) DT ## 1 = female, 2 = male ``` From 781a123f5a764055ca0b389fa490add0c9624028 Mon Sep 17 00:00:00 2001 From: venom1204 Date: Thu, 10 Jul 2025 17:21:54 +0530 Subject: [PATCH 5/6] Update datatable-reshape.Rmd --- vignettes/datatable-reshape.Rmd | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vignettes/datatable-reshape.Rmd b/vignettes/datatable-reshape.Rmd index 55b8ea36bf..6495736bad 100644 --- a/vignettes/datatable-reshape.Rmd +++ b/vignettes/datatable-reshape.Rmd @@ -143,11 +143,11 @@ However, there are situations we might run into where the desired operation is n ```{r} s2 = "family_id age_mother name_child1 name_child2 name_child3 gender_child1 gender_child2 gender_child3 -1 30 Ben Anna NA 1 2 NA -2 27 Tom NA NA 2 NA NA -3 26 Lia Sam Amy 2 2 1 -4 32 Max Zoe Joe 1 1 1 -5 29 Dan Eva NA 2 1 NA" +1 30 Ben Anna NA 1 2 NA +2 27 Tom NA NA 2 NA NA +3 26 Lia Sam Amy 2 2 1 +4 32 Max Zoe Joe 1 1 1 +5 29 Dan Eva NA 2 1 NA" DT = fread(s2) DT ## 1 = female, 2 = male From 3107df7ebdebc079448f7c8f964e7237fe3b6d61 Mon Sep 17 00:00:00 2001 From: venom1204 Date: Fri, 11 Jul 2025 04:03:52 +0530 Subject: [PATCH 6/6] modified to right justification --- vignettes/datatable-reshape.Rmd | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/vignettes/datatable-reshape.Rmd b/vignettes/datatable-reshape.Rmd index 6495736bad..ba8758b0bb 100644 --- a/vignettes/datatable-reshape.Rmd +++ b/vignettes/datatable-reshape.Rmd @@ -142,13 +142,13 @@ So far we've seen features of `melt` and `dcast` that are implemented efficientl However, there are situations we might run into where the desired operation is not expressed in a straightforward manner. For example, consider the `data.table` shown below: ```{r} -s2 = "family_id age_mother name_child1 name_child2 name_child3 gender_child1 gender_child2 gender_child3 -1 30 Ben Anna NA 1 2 NA -2 27 Tom NA NA 2 NA NA -3 26 Lia Sam Amy 2 2 1 -4 32 Max Zoe Joe 1 1 1 -5 29 Dan Eva NA 2 1 NA" -DT = fread(s2) +s2 <- "family_id age_mother name_child1 name_child2 name_child3 gender_child1 gender_child2 gender_child3 + 1 30 Ben Anna NA 1 2 NA + 2 27 Tom NA NA 2 NA NA + 3 26 Lia Sam Amy 2 2 1 + 4 32 Max Zoe Joe 1 1 1 + 5 29 Dan Eva NA 2 1 NA" +DT <- fread(s2) DT ## 1 = female, 2 = male ```