Index: all.equal.R
===================================================================
RCS file: /home/cvsroot/dtk/Splus/patches/all.equal.R,v
retrieving revision 1.1.1.1
retrieving revision 1.4
diff -u -r1.1.1.1 -r1.4
--- all.equal.R	1 Oct 2005 06:51:06 -0000	1.1.1.1
+++ all.equal.R	1 Oct 2005 13:10:25 -0000	1.4
@@ -1,4 +1,75 @@
-all.equal <- function(target, current, ...) UseMethod("all.equal")
+#
+# This is a copy of "src/library/base/R/all.equal.R" from
+# "R-beta_2005-09-24_r35666.tar.gz", plus our modifications.  (The
+# all.equal.R in that tarball seems to be unchanged at least as far
+# back as R 2.1.0.)
+#
+# Further detail is in the comments in each function, but basically,
+# all modifications here involve either of two sorts of improvements:
+#
+# 1. Check names!  Stock R all.equal() (unlike S-Plus) ignores names
+#    completely on some objects.  I consider this bogus, if the names
+#    are different, the object is NOT "the same".
+#
+# 2. When the object is different, return more output to help the user
+#    understand just WHAT is different.
+#
+# Note: Here in our patches package, we purposely CVS import and than
+# override ALL the base all.equal() methods, NOT just the ones we're
+# actually modifying.  At first I tried only overriding some of them,
+# but in that case, even though package:patches was earlier on the
+# search path than package:base, base methods appeared to
+# preferentially call the original base versions, rather than the
+# patches versions that I wanted.  So, big hammer it, override
+# everything - which will probably make it easier to contribute these
+# improvements back to stock R anyway.
+#
+# --atp@piskorski.com, 2005/10/01 02:29 EDT
+#
+# $Id: all.equal.R,v 1.4 2005/10/01 13:10:25 andy Exp $
+
+
+# In S-Plus, all.equal() prefers to index objects by name, while in
+# stock R, it prefers to index by position.  IMO, *NEITHER* of those
+# behaviors are fully correct.  What we really want is to compare
+# things BOTH by name and by position.
+#
+# Here's ONE example of the effect of these R patches:
+#
+## S-Plus 6.2, no patches to all.equal():
+#> all.equal(list(a=2,2,x=3,zap=1,foo=42,"NA"=T) ,list(b=1,2,y=4,foo=7,zap=1,"NA"=F))
+#[1] "Names: 4 string mismatches"                        
+#[2] "Components not in target: b, y"                    
+#[3] "Components not in current: a, x"                   
+#[4] "Component foo: Mean relative difference: 0.8333333"
+#[5] "Component NA: Mean relative difference: 1"         
+#
+## R 2.1.0, no patches to all.equal():
+#> all.equal(list(a=2,2,x=3,zap=1,foo=42,"NA"=T) ,list(b=1,2,y=4,foo=7,zap=1,"NA"=F))
+#[1] "Names: 4 string mismatches"                       
+#[2] "Component 1: Mean relative  difference: 0.5"      
+#[3] "Component 3: Mean relative  difference: 0.3333333"
+#[4] "Component 4: Mean relative  difference: 6"        
+#[5] "Component 5: Mean relative  difference: 0.9761905"
+#[6] "Component 6: Mean relative  difference: 1"        
+#
+## R 2.1.0 with our patches here:
+#> all.equal(list(a=2,2,x=3,zap=1,foo=42,"NA"=T) ,list(b=1,2,y=4,foo=7,zap=1,"NA"=F))
+# [1] "Names: 4 string mismatches"                         
+# [2] "Components not in target: b, y"                     
+# [3] "Components not in current: a, x"                    
+# [4] "Component foo: Mean relative  difference: 0.8333333"
+# [5] "Component NA: Mean relative  difference: 1"         
+# [6] "Component 1: Mean relative  difference: 0.5"        
+# [7] "Component 3: Mean relative  difference: 0.3333333"  
+# [8] "Component 4: Mean relative  difference: 6"          
+# [9] "Component 5: Mean relative  difference: 0.9761905"  
+#[10] "Component 6: Mean relative  difference: 1"          
+
+
+#all.equal.original.fcn <- get("all.equal" ,pos="package:base")
+all.equal <- function(target, current, ... ,debug.p=FALSE) UseMethod("all.equal")
+
 
 ## NO:  is.*(x) should be like S4  is(x, *) ! -- use  isTRUE(all.equal(*))
 ## is.all.equal <- function(target, current, ...)
@@ -32,23 +103,32 @@
 function(target, current, tolerance = .Machine$double.eps ^ .5,
          scale=NULL, ...)
 {
-    if(data.class(target) != data.class(current))
-        return(paste("target is ", data.class(target), ", current is ",
-		       data.class(current), sep = ""))
+    msg <- attr.all.equal(target, current ,...)
+    if(data.class(target) != data.class(current)) {
+        msg <- c(msg ,paste("target is ", data.class(target), ", current is ",
+                            data.class(current), sep = ""))
+        return(msg)
+    }
+
     lt <- length(target)
     lc <- length(current)
     cplx <- is.complex(target)
-    if(lt != lc)
-	return(paste(if(cplx)"Complex" else "Numeric",
-                     ": lengths (", lt, ", ", lc, ") differ", sep = ""))
+    if(lt != lc) {
+        msg <- c(msg ,paste(if(cplx)"Complex" else "Numeric",
+                            ": lengths (", lt, ", ", lc, ") differ", sep = ""))
+	return(msg)
+    }
     target <- as.vector(target)
     current <- as.vector(current)
     out <- is.na(target)
-    if(any(out != is.na(current)))
-	return(paste("`is.NA' value mismatches:", sum(is.na(current)),
-		     "in current,", sum(out), " in target"))
+    if(any(out != is.na(current))) {
+        msg <- c(msg ,paste("`is.NA' value mismatches:", sum(is.na(current)),
+                            "in current,", sum(out), " in target"))
+	return(msg)
+    }
     out <- out | target == current
-    if(all(out)) return(TRUE)
+    if(all(out)) { if (is.null(msg)) return(TRUE) else return(msg) }
+
     target <- target[!out]
     current <- current[!out]
     xy <- mean((if(cplx)Mod else abs)(target - current))
@@ -63,29 +143,37 @@
 	    xy <- xy/scale
 	    "scaled"
 	}
+
     if(is.na(xy) || xy > tolerance)
-	paste("Mean", what, if(cplx)"Mod", "difference:", format(xy)) else TRUE
+       msg <- c(msg ,paste("Mean", what, if(cplx)"Mod", "difference:", format(xy)))
+
+    if(is.null(msg)) TRUE else msg
 }
 
 all.equal.character <- function(target, current, ...)
 {
-    if(data.class(target) != data.class(current))
-        return(paste("target is ", data.class(target), ", current is ",
-		       data.class(current), sep = ""))
+    msg <- attr.all.equal(target, current ,...)
+    if(data.class(target) != data.class(current)) {
+        msg <- c(msg ,paste("target is ", data.class(target), ", current is ",
+                            data.class(current), sep = ""))
+        return(msg)
+    }
     lt <- length(target)
     lc <- length(current)
     if(lt != lc) {
-	msg <- paste("Lengths (", lt, ", ", lc,
+	msg <- c(msg ,paste("Lengths (", lt, ", ", lc,
 		     ") differ (string compare on first ", ll <- min(lt, lc),
-		     ")", sep = "")
+		     ")", sep = ""))
 	ll <- seq(length = ll)
 	target <- target[ll]
 	current <- current[ll]
-    } else msg <- NULL
+    }
     nas <- is.na(target)
-    if (any(nas != is.na(current)))
-        return(paste("`is.NA' value mismatches:", sum(is.na(current)),
+    if (any(nas != is.na(current))) {
+        msg <- c(msg ,paste("`is.NA' value mismatches:", sum(is.na(current)),
                      "in current,", sum(nas), " in target"))
+        return(msg)
+    }
     ne <- !nas & (target != current)
     if(!any(ne) && is.null(msg)) TRUE
     else if(any(ne)) c(msg, paste(sum(ne), "string mismatches"))
@@ -141,76 +229,205 @@
     if(is.null(msg)) TRUE else msg
 }
 
-all.equal.list <- function(target, current, ...)
-{
-    msg <- attr.all.equal(target, current, ...)
-#    nt <- names(target)
-    nc <- names(current)
-    iseq <-
-        ## <FIXME>
-        ## Commenting this eliminates PR#674, and assumes that lists are
-        ## regarded as generic vectors, so that they are equal iff they
-        ## have identical names attributes and all components are equal.
-        ## if(length(nt) && length(nc)) {
-        ##     if(any(not.in <- (c.in.t <- match(nc, nt, 0)) == 0))
-        ## 	msg <- c(msg, paste("Components not in target:",
-        ## 			    paste(nc[not.in], collapse = ", ")))
-        ##     if(any(not.in <- match(nt, nc, 0) == 0))
-        ## 	msg <- c(msg, paste("Components not in current:",
-        ## 			    paste(nt[not.in], collapse = ", ")))
-        ##     nt[c.in.t]
-        ## } else
-        ## </FIXME>
-        if(length(target) == length(current)) {
-	    seq(along = target)
-	} else {
-	    nc <- min(length(target), length(current))
-	    msg <- c(msg, paste("Length mismatch: comparison on first",
-				nc, "components"))
-	    seq(length = nc)
-	}
-    for(i in iseq) {
-	mi <- all.equal(target[[i]], current[[i]], ...)
-	if(is.character(mi))
-	    msg <- c(msg, paste("Component ", i, ": ", mi, sep=""))
-    }
-    if(is.null(msg)) TRUE else msg
+all.equal.list <- function
+(target, current, ..., by.name="auto", by.pos=TRUE, debug.p=FALSE) {
+   # This is copied from "R-beta_2005-09-24_r35666.tar.gz" (which
+   # seems to be unchangd since at least R 2.1.0) and then
+   # modified.
+   #
+   # The stock implementation checked names (and other attributes)
+   # on the list itself but NOT on the components of the list!  We
+   # fix that below.  Furthermore, when reporting differences, it
+   # is much more helpful to report the name-indexed as well as
+   # position-indexed differences, so do that too.
+   #
+   # See also:
+   #   http://r-bugs.biostat.ku.dk/cgi-bin/R/Language-fixed?id=674
+   #   https://stat.ethz.ch/pipermail/r-devel/2000-October/thread.html#21323
+   #
+   # --atp@piskorski.com, 2005/09/27 20:32 EDT
+
+   # - by.name:  When set to "auto" (and by.pos=T), we display the
+   #   by.name=T messages if and only if the by.pos=T checks found
+   #   that one or more names differ.  This is particularly useful for
+   #   data frames, as it immediately disambiguates the "columns are
+   #   just in different orders" vs. "columns of the same name really
+   #   do have different values" cases.
+
+   msg <- attr.all.equal(target, current, ... ,debug.p=debug.p)
+
+   recurse <- function(by.which ,show.other.p) {
+      msg <- c()
+      if (by.which == "name") {
+         by.name <- TRUE ; by.pos <- FALSE
+      } else {
+         by.pos <- TRUE
+         if (by.name != "auto") by.name <- FALSE
+      }
+
+      for (i in iseq) {
+         other.str <- ""
+         mi <- all.equal(target[[i]], current[[i]], ...
+                         ,by.name=by.name ,by.pos=by.pos ,debug.p=debug.p)
+         if (is.character(mi)) {
+            names.differ.p <- F
+            if (by.which == "pos") {
+               name.c <- nc[[i]] ; name.t <- nt[[i]]
+               tmp <- (name.c == name.t)
+               if (length(name.c)==0 && length(name.t)==0) {
+                  # No names at all, show nothing.
+               } else if (length(tmp) && !is.na(tmp) && tmp) {
+                  # Names are the same, only show one:
+                  other.str <- paste(" (" ,name.c ,")" ,sep="")
+               } else {
+                  # Current and Target names differ, show both:
+                  other.str <- paste(" (" ,name.c ," / " ,name.t ,")" ,sep="")
+                  names.differ.p <- T
+               }
+            }
+            msg <- c(msg, paste("Component ", i, other.str, ": ", mi, sep=""))
+
+            if (by.which == "pos" && names.differ.p && by.name == "auto") {
+               tmp <- which(name.c == names(target))
+               if (length(tmp)==0) {
+                  ## This is redundant with the "names not in Target:"
+                  ## message we already printed out:
+                  #msg <- c(msg, paste("Component ", name.c, ": ", "Not in Target.", sep=""))
+               } else {
+                  if (length(tmp) > 1) {
+                     msg <- c(msg, paste("Warning:" ,length(tmp) ,"components in Target w/ name:" ,name.c))
+                     # The code below will check only the first named component:
+                  }
+                  mi <- all.equal(target[[name.c]] ,current[[name.c]] ,...
+                                  ,by.name=TRUE ,by.pos=FALSE ,debug.p=debug.p)
+                  if (is.character(mi))
+                     msg <- c(msg, paste("Component ", name.c, ": ", mi, sep=""))
+                  else
+                     msg <- c(msg, paste("Component ", name.c, ": ", "[same]", sep=""))
+               }
+            }
+         }
+      }
+      return(msg)
+   }
+
+   if (by.name==FALSE && !by.pos)
+      stop("Cannot have both by.pos and by.name False!")
+   if (!is.logical(by.name) && by.name != "auto")
+      stop(paste("Invalid value for by.name:" ,by.name))
+   nt <- names(target) ; nc <- names(current)
+
+   if (by.name == TRUE && length(nt) > 0 && length(nc) > 0) {
+      ## These "Components not in" messages are redundant with the
+      ## "names not in" messages we've already printed out:
+      ## --atp@piskorski.com, 2005/10/01 08:05 EDT
+      if(any(not.in <- (c.in.t <- match(nc, nt, 0)) == 0)) {
+         #msg <- c(msg, paste("Components not in target:", paste(nc[not.in], collapse = ", ")))
+      }
+      #if(any(not.in <- match(nt, nc, 0) == 0)) {
+      #   msg <- c(msg, paste("Components not in current:", paste(nt[not.in], collapse = ", ")))
+      #}
+      iseq <- nt[c.in.t]
+      msg <- c(msg ,recurse(by.which="name"))
+   }
+
+   if (by.pos) {
+      iseq <-
+         (if(length(target) == length(current)) {
+            seq(along = target)
+         } else {
+            tmp <- min(length(target), length(current))
+            msg <- c(msg, paste("Length mismatch: comparison on first",
+                                tmp, "components"))
+            seq(length = tmp)
+         })
+      msg <- c(msg ,recurse(by.which="pos"))
+   }
+
+   if(is.null(msg)) TRUE else msg
 }
 
+
 
-attr.all.equal <- function(target, current, ...)
-{
-    ##--- "all.equal(.)" for attributes ---
-    ##---  Auxiliary in all.equal(.) methods --- return NULL or character()
-    msg <- NULL
-    if(mode(target) != mode(current))
-	msg <- paste("Modes: ", mode(target), ", ", mode(current), sep = "")
-    if(length(target) != length(current))
-	msg <- c(msg, paste("Lengths: ", length(target), ", ",
-			    length(current), sep = ""))
-    ax <- attributes(target)
-    ay <- attributes(current)
-    nx <- names(target)
-    ny <- names(current)
-    if((lx <- length(nx)) | (ly <- length(ny))) {
-	## names() treated now; hence NOT with attributes()
-	ax$names <- ay$names <- NULL
-	if(lx && ly) {
-	    if(is.character(m <- all.equal.character(nx, ny)))
-		msg <- c(msg, paste("Names:", m))
-	} else if(lx)
-	    msg <- c(msg, "names for target but not for current")
-	else msg <- c(msg, "names for current but not for target")
-    }
-    if(length(ax) || length(ay)) {# some (more) attributes
-	## order by names before comparison:
-	nx <- names(ax)
-	ny <- names(ay)
-	if(length(nx))	    ax <- ax[order(nx)]
-	if(length(ny))	    ay <- ay[order(ny)]
-	tt <- all.equal(ax, ay, ...)
-	if(is.character(tt)) msg <- c(msg, paste("Attributes: <", tt, ">"))
-    }
-    msg # NULL or character
+attr.all.equal <- function(target, current, ... ,debug.p=FALSE) {
+   # Based on stock "R-beta_2005-09-24_r35666.tar.gz".  Differences are:
+   # - Also report WHICH names differ.
+   # - Do same checks on row.names and dimnames (if present) as on
+   #   names.
+   # --atp@piskorski.com, 2005/10/01 01:16 EDT
+
+   ##--- "all.equal(.)" for attributes ---
+   ##---  Auxiliary in all.equal(.) methods --- return NULL or character()
+   msg <- NULL
+   if(mode(target) != mode(current))
+      msg <- paste("Modes: ", mode(target), ", ", mode(current), sep = "")
+   if(length(target) != length(current))
+      msg <- c(msg, paste("Lengths: ", length(target), ", ",
+                          length(current), sep = ""))
+   ax <- attributes(target)
+   ay <- attributes(current)
+
+   local.compare.names <- function() {
+      msg <- c()
+      if(lx && ly) {
+         if(is.character(m <- all.equal.character(nx, ny ,debug.p=debug.p))) {
+            msg <- c(msg, paste("Names:", m))
+            not.in <- setdiff(ny ,nx)
+            if (length(not.in) > 0)
+               msg <- c(msg, paste(length(not.in) ,name.type.pretty
+                                   ,"not in Target:"
+                                   ,paste(not.in,collapse=", ")))
+            not.in <- setdiff(nx ,ny)
+            if (length(not.in) > 0)
+               msg <- c(msg, paste(length(not.in) ,name.type.pretty
+                                   ,"not in Current:"
+                                   ,paste(not.in,collapse=", ")))
+         }
+      } else if(lx) {
+         msg <- c(msg ,name.type.pretty ,"for Target but not for Current")
+      } else { msg <- c(msg ,name.type.pretty ,"for Current but not for Target") }
+      return(msg)
+   }
+
+   nx <- names(target) ; ny <- names(current)
+   if((lx <- length(nx)) | (ly <- length(ny))) {
+      ## names() treated now; hence NOT with attributes()
+      ax$names <- ay$names <- NULL
+      name.type.pretty <- "names"
+      msg <- c(msg ,local.compare.names())
+   }
+   if (any(names(ax) == "row.names") && any(names(ay) == "row.names")) {
+      nx <- row.names(target) ; ny <- row.names(current)
+      if((lx <- length(nx)) | (ly <- length(ny))) {
+         ## row.names() treated now; hence NOT with attributes():
+         ax$row.names <- ay$row.names <- NULL
+         name.type.pretty <- "row.names"
+         msg <- c(msg ,local.compare.names())
+      }
+   }
+   if (any(names(ax) == "dimnames") && any(names(ay) == "dimnames")) {
+      # We destructively remove dimnames, so loop from highest to lowest:
+      for (dim.i in length(dimnames(target)):1) {
+         nx <- dimnames(target)[[dim.i]] ; ny <- dimnames(current)[[dim.i]]
+         if((lx <- length(nx)) | (ly <- length(ny))) {
+            ## dimnames()[[dim.i]] treated now; hence NOT with attributes():
+            ax$dimnames[[dim.i]] <- ay$dimnames[[dim.i]] <- NULL
+            name.type.pretty <- paste("dimnames[[" ,dim.i ,"]]" ,sep="")
+            msg <- c(msg ,local.compare.names())
+         }
+      }
+   }
+
+   if(length(ax) || length(ay)) {# some (more) attributes
+      ## order by names before comparison:
+      nx <- names(ax)
+      ny <- names(ay)
+      if(length(nx))	    ax <- ax[order(nx)]
+      if(length(ny))	    ay <- ay[order(ny)]
+      tt <- all.equal(ax, ay, ... ,debug.p=debug.p)
+      if(is.character(tt)) msg <- c(msg, paste("Attributes: <", tt, ">"))
+   }
+
+   msg # NULL or character
 }