From 05465b04de88b4f68377ee2184a44b7fdc262eae Mon Sep 17 00:00:00 2001
From: Scott Ritchie <sritchie73@gmail.com>
Date: Sat, 17 Feb 2018 11:40:14 +1100
Subject: [PATCH 01/14] Fixed duplicate column names in merge when by.x in
 names(y)

---
 R/merge.R | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/R/merge.R b/R/merge.R
index ceb87c75e9..fec29b3786 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -51,6 +51,13 @@ merge.data.table <- function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FA
     start[chmatch(dupnames, start, 0L)] = paste(dupnames, suffixes[1L], sep="")
     end[chmatch(dupnames, end, 0L)] = paste(dupnames, suffixes[2L], sep="")
   }
+  # If by.x != by.y then the 'by' column(s) are named as 'by.x' - we need
+  # to also handle cases where the 'by.x' column names are in 'end'
+  dupkeyx = intersect(by.x, end)
+  if (length(dupkeyx)) {
+    by.x[chmatch(dupkeyx, by.x, 0L)] = paste(dupkeyx, suffixes[1L], sep="")
+    end[chmatch(dupkeyx, end, 0L)] = paste(dupkeyx, suffixes[2L], sep="")
+  }
 
   dt = y[x,nomatch = if (all.x) NA else 0L,on=by,allow.cartesian=allow.cartesian]   # includes JIS columns (with a i. prefix if conflict with x names)
 

From 79653e7f2dceb2e1b33f6f03cb66bff30e474936 Mon Sep 17 00:00:00 2001
From: Scott Ritchie <sritchie73@gmail.com>
Date: Sat, 17 Feb 2018 12:00:50 +1100
Subject: [PATCH 02/14] added test

---
 inst/tests/tests.Rraw | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index c9537b91b4..75136e1fdb 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -11671,6 +11671,14 @@ unlink(f)
 test(1876, fread("http://hkhfsk\nhttp://fhdkf\nhttp://kjfhskd\nhttp://hfkjf", header=FALSE),    # data not a download, #2531
            data.table(V1=c("http://hkhfsk","http://fhdkf","http://kjfhskd","http://hfkjf")))
 
+# PR #2631 - Fix duplicated names arising in merge when by.x in names(y)
+# Test should fail in there are any duplicate names after a join
+parents = data.table(name=c("Sarah", "Max"), sex=c("F", "M"), age=c(41, 43))
+children = data.table(parent=c("Sarah", "Max", "Max"), 
+                       name=c("Oliver", "Sebastian", "Michelle"),
+                       sex=c("M", "M", "F"), age=c(5,8,7))
+joined = merge(parents, children, by.x="name", by.y="parent")                       
+test(1877, length(names(joined)), length(unique(names(joined)))) 
 
 ##########################
 
@@ -11716,4 +11724,3 @@ cat("\n",plat,"\n\nAll ",ntest," tests in inst/tests/tests.Rraw completed ok in
 # date() is included so we can tell when CRAN checks were run (in particular if they have been rerun since
 # an update to Rdevel itself; data.table doesn't have any other dependency) since there appears to be no other
 # way to see the timestamp that CRAN checks were run. Some CRAN machines lag by several days.
-

From 091251d787ec97d8390934cdcf01cadce82a7b23 Mon Sep 17 00:00:00 2001
From: Scott Ritchie <sritchie73@gmail.com>
Date: Sat, 17 Feb 2018 12:14:00 +1100
Subject: [PATCH 03/14] added NEWS entry

---
 NEWS.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 55fa516c90..880645c6e2 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -138,6 +138,9 @@ Thanks to @MichaelChirico for reporting and to @MarkusBonsch for the implementat
 
 32. `x.` prefixes during joins sometimes resulted in a "column not found" error. This is now fixed. Closes [#2313](https://github.com/Rdatatable/data.table/issues/2313). Thanks to @franknarf1 for the MRE.
 
+33. Fixed bug where result of `merge()` would contain duplicate column names if `by.x` was also in `names(y)`.
+Thanks to @sritchie73 for reporting and fixing [PR#2631](https://github.com/Rdatatable/data.table/pull/2631)
+
 #### NOTES
 
 0. The license has been changed from GPL to MPL (Mozilla Public License). All contributors were consulted and approved. [PR#2456](https://github.com/Rdatatable/data.table/pull/2456) details the reasons for the change.

From b6aaa98c3e6f13e24345b2833bd1bf4a1eb943ec Mon Sep 17 00:00:00 2001
From: Scott Ritchie <sritchie73@gmail.com>
Date: Sat, 17 Feb 2018 12:28:36 +1100
Subject: [PATCH 04/14] merge throws warning when duplicate names in result

---
 NEWS.md               | 3 ++-
 R/merge.R             | 9 +++++++++
 inst/tests/tests.Rraw | 4 +++-
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 880645c6e2..b7d8fdec82 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -139,7 +139,8 @@ Thanks to @MichaelChirico for reporting and to @MarkusBonsch for the implementat
 32. `x.` prefixes during joins sometimes resulted in a "column not found" error. This is now fixed. Closes [#2313](https://github.com/Rdatatable/data.table/issues/2313). Thanks to @franknarf1 for the MRE.
 
 33. Fixed bug where result of `merge()` would contain duplicate column names if `by.x` was also in `names(y)`.
-Thanks to @sritchie73 for reporting and fixing [PR#2631](https://github.com/Rdatatable/data.table/pull/2631)
+Where there are duplicate column names (i.e. `suffixes = c("", "")`) `merge()` will throw a warning to match
+the behaviour of `base:::merge.data.frame()`. Thanks to @sritchie73 for reporting and fixing [PR#2631](https://github.com/Rdatatable/data.table/pull/2631). 
 
 #### NOTES
 
diff --git a/R/merge.R b/R/merge.R
index fec29b3786..64b626295f 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -85,6 +85,15 @@ merge.data.table <- function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FA
   if (nrow(dt) > 0L) {
     setkeyv(dt, if (sort) by.x else NULL)
   }
+  
+  # Throw warning if there are duplicate column names in 'dt' (i.e. if
+  # `suffixes=c("","")`, to match behaviour in base:::merge.data.frame)
+  resultdupnames <- names(dt)[duplicated(names(dt))]
+  if (length(resultdupnames)) {
+    warning("column names ", paste0("'", resultdupnames, "'", collapse=", "), 
+            " are duplicated in the result")
+  }
+  
   # merge resets class, #1378. X[Y] is quite clear that X is being *subset* by Y,
   # makes sense to therefore retain X's class, unlike `merge`. Hard to tell what
   # class to retain for *full join* for example.
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 75136e1fdb..92b1453485 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -11678,7 +11678,9 @@ children = data.table(parent=c("Sarah", "Max", "Max"),
                        name=c("Oliver", "Sebastian", "Michelle"),
                        sex=c("M", "M", "F"), age=c(5,8,7))
 joined = merge(parents, children, by.x="name", by.y="parent")                       
-test(1877, length(names(joined)), length(unique(names(joined)))) 
+test(1877.1, length(names(joined)), length(unique(names(joined)))) 
+test(1877.2, merge(parents, children, by.x="name", by.y="parent", suffixes=c("",""),
+     warning = "column names 'name', 'sex', 'age' are duplicated in the result"), 
 
 ##########################
 

From f9c1380a3ae853c0b23caec32ea08be264fe2d12 Mon Sep 17 00:00:00 2001
From: Scott Ritchie <sritchie73@gmail.com>
Date: Sat, 17 Feb 2018 14:01:39 +1100
Subject: [PATCH 05/14] syntax typo

---
 inst/tests/tests.Rraw | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 92b1453485..6a801ce17e 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -11679,8 +11679,8 @@ children = data.table(parent=c("Sarah", "Max", "Max"),
                        sex=c("M", "M", "F"), age=c(5,8,7))
 joined = merge(parents, children, by.x="name", by.y="parent")                       
 test(1877.1, length(names(joined)), length(unique(names(joined)))) 
-test(1877.2, merge(parents, children, by.x="name", by.y="parent", suffixes=c("",""),
-     warning = "column names 'name', 'sex', 'age' are duplicated in the result"), 
+test(1877.2, merge(parents, children, by.x="name", by.y="parent", suffixes=c("","")),
+     warning = "column names 'name', 'sex', 'age' are duplicated in the result")
 
 ##########################
 

From 560b39029684264b8b7453bdc0da9ba5a99c17be Mon Sep 17 00:00:00 2001
From: Scott Ritchie <sritchie73@gmail.com>
Date: Sun, 18 Feb 2018 12:54:13 +1100
Subject: [PATCH 06/14] warning test

---
 inst/tests/tests.Rraw | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 6a801ce17e..96bb82fcc4 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -11680,7 +11680,8 @@ children = data.table(parent=c("Sarah", "Max", "Max"),
 joined = merge(parents, children, by.x="name", by.y="parent")                       
 test(1877.1, length(names(joined)), length(unique(names(joined)))) 
 test(1877.2, merge(parents, children, by.x="name", by.y="parent", suffixes=c("","")),
-     warning = "column names 'name', 'sex', 'age' are duplicated in the result")
+             merge(parents, children, by.x="name", by.y="parent", suffixes=c("","")),
+             warning = "column names.*are duplicated in the result")
 
 ##########################
 

From f81eaa5dd2662d12d3120c9526075cfe325b9818 Mon Sep 17 00:00:00 2001
From: Scott Ritchie <sritchie73@gmail.com>
Date: Sun, 18 Feb 2018 13:14:09 +1100
Subject: [PATCH 07/14] warning test output comparison no longer generates
 warning

---
 inst/tests/tests.Rraw | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 9d4ab50c2a..d87872116a 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -11677,15 +11677,15 @@ test(1877.2, attr(setattr(data.table(x = 1:10), "class", character()), "class"),
 test(1877.3, attr(setattr(data.table(x = 1:10), "test", character()), "test"), character(0)) # ok before
 
 # PR #2631 - Fix duplicated names arising in merge when by.x in names(y)
-# Test should fail in there are any duplicate names after a join
+# 1878.1 should fail in there are any duplicate names after a join
+# 1878.2 should fail if a warning is not thrown when suffixes leads to duplicate names
 parents = data.table(name=c("Sarah", "Max"), sex=c("F", "M"), age=c(41, 43))
 children = data.table(parent=c("Sarah", "Max", "Max"), 
                        name=c("Oliver", "Sebastian", "Michelle"),
                        sex=c("M", "M", "F"), age=c(5,8,7))
 joined = merge(parents, children, by.x="name", by.y="parent")                       
 test(1878.1, length(names(joined)), length(unique(names(joined)))) 
-test(1878.2, merge(parents, children, by.x="name", by.y="parent", suffixes=c("","")),
-             merge(parents, children, by.x="name", by.y="parent", suffixes=c("","")),
+test(1878.2, nrow(merge(parents, children, by.x="name", by.y="parent", suffixes=c("",""))), 3,
              warning = "column names.*are duplicated in the result")
 
 ##########################

From 2ec45262a285c44a0c9ed38e9d6a5bd9aec8cec4 Mon Sep 17 00:00:00 2001
From: Scott Ritchie <sritchie73@gmail.com>
Date: Sun, 18 Feb 2018 13:24:10 +1100
Subject: [PATCH 08/14] integer vs double failed check fixed

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index d87872116a..9b370943bb 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -11685,7 +11685,7 @@ children = data.table(parent=c("Sarah", "Max", "Max"),
                        sex=c("M", "M", "F"), age=c(5,8,7))
 joined = merge(parents, children, by.x="name", by.y="parent")                       
 test(1878.1, length(names(joined)), length(unique(names(joined)))) 
-test(1878.2, nrow(merge(parents, children, by.x="name", by.y="parent", suffixes=c("",""))), 3,
+test(1878.2, nrow(merge(parents, children, by.x="name", by.y="parent", suffixes=c("",""))), 3L,
              warning = "column names.*are duplicated in the result")
 
 ##########################

From 2006b14a059302367cfab0a0d8f836fc3155f0e7 Mon Sep 17 00:00:00 2001
From: Scott Ritchie <sritchie73@gmail.com>
Date: Sun, 18 Feb 2018 13:29:49 +1100
Subject: [PATCH 09/14] Removed test 1543.3

This test now fails because the output of merge.data.table does
not match the output of merge.data.frame because base::merge.data.frame
still leads to duplicate column names where by.x is in names(y).
---
 inst/tests/tests.Rraw | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 9b370943bb..ae181504b0 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -7154,15 +7154,12 @@ ans2 <- setDF(merge(setDT(d1), setDT(d2), all=TRUE, by="A"))
 test(1542.2, ans1, ans2)
 # test duplicate name cases
 setnames(d2, c("A", "Y"), c("B", "A"))
-ans1 <- suppressWarnings(merge(setDF(d1), setDF(d2), by.x="A", by.y="B"))
-ans2 <- setDF(merge(setDT(d1), setDT(d2), by.x="A", by.y="B"))
-test(1543.3, ans1, ans2)
 ans1 <- suppressWarnings(merge(setDF(d2), setDF(d1), by.x="B", by.y="A"))
 ans2 <- setDF(merge(setDT(d2), setDT(d1), by.x="B", by.y="A"))
-test(1543.4, ans1, ans2)
+test(1543.3, ans1, ans2)
 ans1 <- suppressWarnings(merge(setDF(d2), setDF(d1), all=TRUE, by.x="B", by.y="A"))
 ans2 <- setDF(merge(setDT(d2), setDT(d1), all=TRUE, by.x="B", by.y="A"))
-test(1543.5, ans1, ans2)
+test(1543.4, ans1, ans2)
 
 # test for sort=FALSE argument, #1282
 set.seed(1L)

From 12fb8ac46a280eb6c55fd815801d433adbe4a4fa Mon Sep 17 00:00:00 2001
From: Scott Ritchie <sritchie73@gmail.com>
Date: Mon, 19 Feb 2018 07:32:47 +1100
Subject: [PATCH 10/14] Updated merge documentation

---
 man/merge.Rd | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/man/merge.Rd b/man/merge.Rd
index a2da1fde3c..07e851bcf0 100644
--- a/man/merge.Rd
+++ b/man/merge.Rd
@@ -14,6 +14,10 @@ very similarly to that of \code{data.frame}s except that, by default, it attempt
 }
 
 Set the \code{by}, or \code{by.x} and \code{by.y} arguments explicitly to override this default.
+
+In addition, if any column names provided to \code{by.x} also occur in \code{names(y)} (and
+not in \code{by.y}) then this function will add the \code{suffixes} to those column names,
+while the \code{data.frame} method will not (leading to duplicate column names in the result).
 }
 
 \usage{

From b3d287a49cf85085782f8f2e27542bf1dc899918 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Thu, 22 Feb 2018 16:15:54 -0800
Subject: [PATCH 11/14] Minor wording:  'function' => 'data.table method'

---
 man/merge.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/merge.Rd b/man/merge.Rd
index 07e851bcf0..b1811c7773 100644
--- a/man/merge.Rd
+++ b/man/merge.Rd
@@ -16,7 +16,7 @@ very similarly to that of \code{data.frame}s except that, by default, it attempt
 Set the \code{by}, or \code{by.x} and \code{by.y} arguments explicitly to override this default.
 
 In addition, if any column names provided to \code{by.x} also occur in \code{names(y)} (and
-not in \code{by.y}) then this function will add the \code{suffixes} to those column names,
+not in \code{by.y}) then this \code{data.table} method will add the \code{suffixes} to those column names,
 while the \code{data.frame} method will not (leading to duplicate column names in the result).
 }
 

From 3f79e7d8ac593738fb42104ed4fef99c06768b7f Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Thu, 22 Feb 2018 16:35:39 -0800
Subject: [PATCH 12/14] Added link and status to merge.Rd

---
 man/merge.Rd | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/man/merge.Rd b/man/merge.Rd
index b1811c7773..9c2ccf9e2d 100644
--- a/man/merge.Rd
+++ b/man/merge.Rd
@@ -15,9 +15,11 @@ very similarly to that of \code{data.frame}s except that, by default, it attempt
 
 Set the \code{by}, or \code{by.x} and \code{by.y} arguments explicitly to override this default.
 
-In addition, if any column names provided to \code{by.x} also occur in \code{names(y)} (and
-not in \code{by.y}) then this \code{data.table} method will add the \code{suffixes} to those column names,
-while the \code{data.frame} method will not (leading to duplicate column names in the result).
+In addition, if any column names provided to \code{by.x} also occur in \code{names(y)} but not in \code{by.y})
+  then this \code{data.table} method will add the \code{suffixes} to those column names. As of
+  R v3.4.3, the \code{data.frame} method will not (leading to duplicate column names in the result) but a patch has
+  been proposed (see thread \url{http://r.789695.n4.nabble.com/Duplicate-column-names-created-by-base-merge-when-by-x-has-the-same-name-as-a-column-in-y-td4748345.html}
+  (here)) which looks likely to be accepted for a future version of R.
 }
 
 \usage{

From a483996f67bbbcaabc5cad3f9ce44ad9c28b4fea Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Thu, 22 Feb 2018 16:46:57 -0800
Subject: [PATCH 13/14] Fixed my bracket typo

---
 man/merge.Rd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/man/merge.Rd b/man/merge.Rd
index 9c2ccf9e2d..95573a970c 100644
--- a/man/merge.Rd
+++ b/man/merge.Rd
@@ -15,11 +15,11 @@ very similarly to that of \code{data.frame}s except that, by default, it attempt
 
 Set the \code{by}, or \code{by.x} and \code{by.y} arguments explicitly to override this default.
 
-In addition, if any column names provided to \code{by.x} also occur in \code{names(y)} but not in \code{by.y})
+In addition, if any column names provided to \code{by.x} also occur in \code{names(y)} but not in \code{by.y},
   then this \code{data.table} method will add the \code{suffixes} to those column names. As of
   R v3.4.3, the \code{data.frame} method will not (leading to duplicate column names in the result) but a patch has
   been proposed (see thread \url{http://r.789695.n4.nabble.com/Duplicate-column-names-created-by-base-merge-when-by-x-has-the-same-name-as-a-column-in-y-td4748345.html}
-  (here)) which looks likely to be accepted for a future version of R.
+  (here)) which is looking likely to be accepted for a future version of R.
 }
 
 \usage{

From 8ad024251dedc15e7a0033a3175a7488a92a3008 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Thu, 22 Feb 2018 17:03:31 -0800
Subject: [PATCH 14/14] Moved new paragraph to Details section.

---
 man/merge.Rd | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/man/merge.Rd b/man/merge.Rd
index 95573a970c..0032a9e849 100644
--- a/man/merge.Rd
+++ b/man/merge.Rd
@@ -14,12 +14,6 @@ very similarly to that of \code{data.frame}s except that, by default, it attempt
 }
 
 Set the \code{by}, or \code{by.x} and \code{by.y} arguments explicitly to override this default.
-
-In addition, if any column names provided to \code{by.x} also occur in \code{names(y)} but not in \code{by.y},
-  then this \code{data.table} method will add the \code{suffixes} to those column names. As of
-  R v3.4.3, the \code{data.frame} method will not (leading to duplicate column names in the result) but a patch has
-  been proposed (see thread \url{http://r.789695.n4.nabble.com/Duplicate-column-names-created-by-base-merge-when-by-x-has-the-same-name-as-a-column-in-y-td4748345.html}
-  (here)) which is looking likely to be accepted for a future version of R.
 }
 
 \usage{
@@ -72,6 +66,12 @@ control for providing the columns to merge on with the help of the newly impleme
 For a more \code{data.table}-centric way of merging two \code{data.table}s, see
 \code{\link{[.data.table}}; e.g., \code{x[y, ...]}. See FAQ 1.12 for a detailed
 comparison of \code{merge} and \code{x[y, ...]}.
+
+If any column names provided to \code{by.x} also occur in \code{names(y)} but not in \code{by.y},
+then this \code{data.table} method will add the \code{suffixes} to those column names. As of
+R v3.4.3, the \code{data.frame} method will not (leading to duplicate column names in the result) but a patch has
+been proposed (see thread \href{http://r.789695.n4.nabble.com/Duplicate-column-names-created-by-base-merge-when-by-x-has-the-same-name-as-a-column-in-y-td4748345.html}{here})
+which is looking likely to be accepted for a future version of R.
 }
 
 \value{