Index: all.equal.R =================================================================== RCS file: /home/cvsroot/dtk/Splus/patches/all.equal.R,v retrieving revision 1.1.1.1 retrieving revision 1.4 diff -u -r1.1.1.1 -r1.4 --- all.equal.R 1 Oct 2005 06:51:06 -0000 1.1.1.1 +++ all.equal.R 1 Oct 2005 13:10:25 -0000 1.4 @@ -1,4 +1,75 @@ -all.equal <- function(target, current, ...) UseMethod("all.equal") +# +# This is a copy of "src/library/base/R/all.equal.R" from +# "R-beta_2005-09-24_r35666.tar.gz", plus our modifications. (The +# all.equal.R in that tarball seems to be unchanged at least as far +# back as R 2.1.0.) +# +# Further detail is in the comments in each function, but basically, +# all modifications here involve either of two sorts of improvements: +# +# 1. Check names! Stock R all.equal() (unlike S-Plus) ignores names +# completely on some objects. I consider this bogus, if the names +# are different, the object is NOT "the same". +# +# 2. When the object is different, return more output to help the user +# understand just WHAT is different. +# +# Note: Here in our patches package, we purposely CVS import and than +# override ALL the base all.equal() methods, NOT just the ones we're +# actually modifying. At first I tried only overriding some of them, +# but in that case, even though package:patches was earlier on the +# search path than package:base, base methods appeared to +# preferentially call the original base versions, rather than the +# patches versions that I wanted. So, big hammer it, override +# everything - which will probably make it easier to contribute these +# improvements back to stock R anyway. +# +# --atp@piskorski.com, 2005/10/01 02:29 EDT +# +# $Id: all.equal.R,v 1.4 2005/10/01 13:10:25 andy Exp $ + + +# In S-Plus, all.equal() prefers to index objects by name, while in +# stock R, it prefers to index by position. IMO, *NEITHER* of those +# behaviors are fully correct. What we really want is to compare +# things BOTH by name and by position. +# +# Here's ONE example of the effect of these R patches: +# +## S-Plus 6.2, no patches to all.equal(): +#> all.equal(list(a=2,2,x=3,zap=1,foo=42,"NA"=T) ,list(b=1,2,y=4,foo=7,zap=1,"NA"=F)) +#[1] "Names: 4 string mismatches" +#[2] "Components not in target: b, y" +#[3] "Components not in current: a, x" +#[4] "Component foo: Mean relative difference: 0.8333333" +#[5] "Component NA: Mean relative difference: 1" +# +## R 2.1.0, no patches to all.equal(): +#> all.equal(list(a=2,2,x=3,zap=1,foo=42,"NA"=T) ,list(b=1,2,y=4,foo=7,zap=1,"NA"=F)) +#[1] "Names: 4 string mismatches" +#[2] "Component 1: Mean relative difference: 0.5" +#[3] "Component 3: Mean relative difference: 0.3333333" +#[4] "Component 4: Mean relative difference: 6" +#[5] "Component 5: Mean relative difference: 0.9761905" +#[6] "Component 6: Mean relative difference: 1" +# +## R 2.1.0 with our patches here: +#> all.equal(list(a=2,2,x=3,zap=1,foo=42,"NA"=T) ,list(b=1,2,y=4,foo=7,zap=1,"NA"=F)) +# [1] "Names: 4 string mismatches" +# [2] "Components not in target: b, y" +# [3] "Components not in current: a, x" +# [4] "Component foo: Mean relative difference: 0.8333333" +# [5] "Component NA: Mean relative difference: 1" +# [6] "Component 1: Mean relative difference: 0.5" +# [7] "Component 3: Mean relative difference: 0.3333333" +# [8] "Component 4: Mean relative difference: 6" +# [9] "Component 5: Mean relative difference: 0.9761905" +#[10] "Component 6: Mean relative difference: 1" + + +#all.equal.original.fcn <- get("all.equal" ,pos="package:base") +all.equal <- function(target, current, ... ,debug.p=FALSE) UseMethod("all.equal") + ## NO: is.*(x) should be like S4 is(x, *) ! -- use isTRUE(all.equal(*)) ## is.all.equal <- function(target, current, ...) @@ -32,23 +103,32 @@ function(target, current, tolerance = .Machine$double.eps ^ .5, scale=NULL, ...) { - if(data.class(target) != data.class(current)) - return(paste("target is ", data.class(target), ", current is ", - data.class(current), sep = "")) + msg <- attr.all.equal(target, current ,...) + if(data.class(target) != data.class(current)) { + msg <- c(msg ,paste("target is ", data.class(target), ", current is ", + data.class(current), sep = "")) + return(msg) + } + lt <- length(target) lc <- length(current) cplx <- is.complex(target) - if(lt != lc) - return(paste(if(cplx)"Complex" else "Numeric", - ": lengths (", lt, ", ", lc, ") differ", sep = "")) + if(lt != lc) { + msg <- c(msg ,paste(if(cplx)"Complex" else "Numeric", + ": lengths (", lt, ", ", lc, ") differ", sep = "")) + return(msg) + } target <- as.vector(target) current <- as.vector(current) out <- is.na(target) - if(any(out != is.na(current))) - return(paste("`is.NA' value mismatches:", sum(is.na(current)), - "in current,", sum(out), " in target")) + if(any(out != is.na(current))) { + msg <- c(msg ,paste("`is.NA' value mismatches:", sum(is.na(current)), + "in current,", sum(out), " in target")) + return(msg) + } out <- out | target == current - if(all(out)) return(TRUE) + if(all(out)) { if (is.null(msg)) return(TRUE) else return(msg) } + target <- target[!out] current <- current[!out] xy <- mean((if(cplx)Mod else abs)(target - current)) @@ -63,29 +143,37 @@ xy <- xy/scale "scaled" } + if(is.na(xy) || xy > tolerance) - paste("Mean", what, if(cplx)"Mod", "difference:", format(xy)) else TRUE + msg <- c(msg ,paste("Mean", what, if(cplx)"Mod", "difference:", format(xy))) + + if(is.null(msg)) TRUE else msg } all.equal.character <- function(target, current, ...) { - if(data.class(target) != data.class(current)) - return(paste("target is ", data.class(target), ", current is ", - data.class(current), sep = "")) + msg <- attr.all.equal(target, current ,...) + if(data.class(target) != data.class(current)) { + msg <- c(msg ,paste("target is ", data.class(target), ", current is ", + data.class(current), sep = "")) + return(msg) + } lt <- length(target) lc <- length(current) if(lt != lc) { - msg <- paste("Lengths (", lt, ", ", lc, + msg <- c(msg ,paste("Lengths (", lt, ", ", lc, ") differ (string compare on first ", ll <- min(lt, lc), - ")", sep = "") + ")", sep = "")) ll <- seq(length = ll) target <- target[ll] current <- current[ll] - } else msg <- NULL + } nas <- is.na(target) - if (any(nas != is.na(current))) - return(paste("`is.NA' value mismatches:", sum(is.na(current)), + if (any(nas != is.na(current))) { + msg <- c(msg ,paste("`is.NA' value mismatches:", sum(is.na(current)), "in current,", sum(nas), " in target")) + return(msg) + } ne <- !nas & (target != current) if(!any(ne) && is.null(msg)) TRUE else if(any(ne)) c(msg, paste(sum(ne), "string mismatches")) @@ -141,76 +229,205 @@ if(is.null(msg)) TRUE else msg } -all.equal.list <- function(target, current, ...) -{ - msg <- attr.all.equal(target, current, ...) -# nt <- names(target) - nc <- names(current) - iseq <- - ## - ## Commenting this eliminates PR#674, and assumes that lists are - ## regarded as generic vectors, so that they are equal iff they - ## have identical names attributes and all components are equal. - ## if(length(nt) && length(nc)) { - ## if(any(not.in <- (c.in.t <- match(nc, nt, 0)) == 0)) - ## msg <- c(msg, paste("Components not in target:", - ## paste(nc[not.in], collapse = ", "))) - ## if(any(not.in <- match(nt, nc, 0) == 0)) - ## msg <- c(msg, paste("Components not in current:", - ## paste(nt[not.in], collapse = ", "))) - ## nt[c.in.t] - ## } else - ## - if(length(target) == length(current)) { - seq(along = target) - } else { - nc <- min(length(target), length(current)) - msg <- c(msg, paste("Length mismatch: comparison on first", - nc, "components")) - seq(length = nc) - } - for(i in iseq) { - mi <- all.equal(target[[i]], current[[i]], ...) - if(is.character(mi)) - msg <- c(msg, paste("Component ", i, ": ", mi, sep="")) - } - if(is.null(msg)) TRUE else msg +all.equal.list <- function +(target, current, ..., by.name="auto", by.pos=TRUE, debug.p=FALSE) { + # This is copied from "R-beta_2005-09-24_r35666.tar.gz" (which + # seems to be unchangd since at least R 2.1.0) and then + # modified. + # + # The stock implementation checked names (and other attributes) + # on the list itself but NOT on the components of the list! We + # fix that below. Furthermore, when reporting differences, it + # is much more helpful to report the name-indexed as well as + # position-indexed differences, so do that too. + # + # See also: + # http://r-bugs.biostat.ku.dk/cgi-bin/R/Language-fixed?id=674 + # https://stat.ethz.ch/pipermail/r-devel/2000-October/thread.html#21323 + # + # --atp@piskorski.com, 2005/09/27 20:32 EDT + + # - by.name: When set to "auto" (and by.pos=T), we display the + # by.name=T messages if and only if the by.pos=T checks found + # that one or more names differ. This is particularly useful for + # data frames, as it immediately disambiguates the "columns are + # just in different orders" vs. "columns of the same name really + # do have different values" cases. + + msg <- attr.all.equal(target, current, ... ,debug.p=debug.p) + + recurse <- function(by.which ,show.other.p) { + msg <- c() + if (by.which == "name") { + by.name <- TRUE ; by.pos <- FALSE + } else { + by.pos <- TRUE + if (by.name != "auto") by.name <- FALSE + } + + for (i in iseq) { + other.str <- "" + mi <- all.equal(target[[i]], current[[i]], ... + ,by.name=by.name ,by.pos=by.pos ,debug.p=debug.p) + if (is.character(mi)) { + names.differ.p <- F + if (by.which == "pos") { + name.c <- nc[[i]] ; name.t <- nt[[i]] + tmp <- (name.c == name.t) + if (length(name.c)==0 && length(name.t)==0) { + # No names at all, show nothing. + } else if (length(tmp) && !is.na(tmp) && tmp) { + # Names are the same, only show one: + other.str <- paste(" (" ,name.c ,")" ,sep="") + } else { + # Current and Target names differ, show both: + other.str <- paste(" (" ,name.c ," / " ,name.t ,")" ,sep="") + names.differ.p <- T + } + } + msg <- c(msg, paste("Component ", i, other.str, ": ", mi, sep="")) + + if (by.which == "pos" && names.differ.p && by.name == "auto") { + tmp <- which(name.c == names(target)) + if (length(tmp)==0) { + ## This is redundant with the "names not in Target:" + ## message we already printed out: + #msg <- c(msg, paste("Component ", name.c, ": ", "Not in Target.", sep="")) + } else { + if (length(tmp) > 1) { + msg <- c(msg, paste("Warning:" ,length(tmp) ,"components in Target w/ name:" ,name.c)) + # The code below will check only the first named component: + } + mi <- all.equal(target[[name.c]] ,current[[name.c]] ,... + ,by.name=TRUE ,by.pos=FALSE ,debug.p=debug.p) + if (is.character(mi)) + msg <- c(msg, paste("Component ", name.c, ": ", mi, sep="")) + else + msg <- c(msg, paste("Component ", name.c, ": ", "[same]", sep="")) + } + } + } + } + return(msg) + } + + if (by.name==FALSE && !by.pos) + stop("Cannot have both by.pos and by.name False!") + if (!is.logical(by.name) && by.name != "auto") + stop(paste("Invalid value for by.name:" ,by.name)) + nt <- names(target) ; nc <- names(current) + + if (by.name == TRUE && length(nt) > 0 && length(nc) > 0) { + ## These "Components not in" messages are redundant with the + ## "names not in" messages we've already printed out: + ## --atp@piskorski.com, 2005/10/01 08:05 EDT + if(any(not.in <- (c.in.t <- match(nc, nt, 0)) == 0)) { + #msg <- c(msg, paste("Components not in target:", paste(nc[not.in], collapse = ", "))) + } + #if(any(not.in <- match(nt, nc, 0) == 0)) { + # msg <- c(msg, paste("Components not in current:", paste(nt[not.in], collapse = ", "))) + #} + iseq <- nt[c.in.t] + msg <- c(msg ,recurse(by.which="name")) + } + + if (by.pos) { + iseq <- + (if(length(target) == length(current)) { + seq(along = target) + } else { + tmp <- min(length(target), length(current)) + msg <- c(msg, paste("Length mismatch: comparison on first", + tmp, "components")) + seq(length = tmp) + }) + msg <- c(msg ,recurse(by.which="pos")) + } + + if(is.null(msg)) TRUE else msg } + -attr.all.equal <- function(target, current, ...) -{ - ##--- "all.equal(.)" for attributes --- - ##--- Auxiliary in all.equal(.) methods --- return NULL or character() - msg <- NULL - if(mode(target) != mode(current)) - msg <- paste("Modes: ", mode(target), ", ", mode(current), sep = "") - if(length(target) != length(current)) - msg <- c(msg, paste("Lengths: ", length(target), ", ", - length(current), sep = "")) - ax <- attributes(target) - ay <- attributes(current) - nx <- names(target) - ny <- names(current) - if((lx <- length(nx)) | (ly <- length(ny))) { - ## names() treated now; hence NOT with attributes() - ax$names <- ay$names <- NULL - if(lx && ly) { - if(is.character(m <- all.equal.character(nx, ny))) - msg <- c(msg, paste("Names:", m)) - } else if(lx) - msg <- c(msg, "names for target but not for current") - else msg <- c(msg, "names for current but not for target") - } - if(length(ax) || length(ay)) {# some (more) attributes - ## order by names before comparison: - nx <- names(ax) - ny <- names(ay) - if(length(nx)) ax <- ax[order(nx)] - if(length(ny)) ay <- ay[order(ny)] - tt <- all.equal(ax, ay, ...) - if(is.character(tt)) msg <- c(msg, paste("Attributes: <", tt, ">")) - } - msg # NULL or character +attr.all.equal <- function(target, current, ... ,debug.p=FALSE) { + # Based on stock "R-beta_2005-09-24_r35666.tar.gz". Differences are: + # - Also report WHICH names differ. + # - Do same checks on row.names and dimnames (if present) as on + # names. + # --atp@piskorski.com, 2005/10/01 01:16 EDT + + ##--- "all.equal(.)" for attributes --- + ##--- Auxiliary in all.equal(.) methods --- return NULL or character() + msg <- NULL + if(mode(target) != mode(current)) + msg <- paste("Modes: ", mode(target), ", ", mode(current), sep = "") + if(length(target) != length(current)) + msg <- c(msg, paste("Lengths: ", length(target), ", ", + length(current), sep = "")) + ax <- attributes(target) + ay <- attributes(current) + + local.compare.names <- function() { + msg <- c() + if(lx && ly) { + if(is.character(m <- all.equal.character(nx, ny ,debug.p=debug.p))) { + msg <- c(msg, paste("Names:", m)) + not.in <- setdiff(ny ,nx) + if (length(not.in) > 0) + msg <- c(msg, paste(length(not.in) ,name.type.pretty + ,"not in Target:" + ,paste(not.in,collapse=", "))) + not.in <- setdiff(nx ,ny) + if (length(not.in) > 0) + msg <- c(msg, paste(length(not.in) ,name.type.pretty + ,"not in Current:" + ,paste(not.in,collapse=", "))) + } + } else if(lx) { + msg <- c(msg ,name.type.pretty ,"for Target but not for Current") + } else { msg <- c(msg ,name.type.pretty ,"for Current but not for Target") } + return(msg) + } + + nx <- names(target) ; ny <- names(current) + if((lx <- length(nx)) | (ly <- length(ny))) { + ## names() treated now; hence NOT with attributes() + ax$names <- ay$names <- NULL + name.type.pretty <- "names" + msg <- c(msg ,local.compare.names()) + } + if (any(names(ax) == "row.names") && any(names(ay) == "row.names")) { + nx <- row.names(target) ; ny <- row.names(current) + if((lx <- length(nx)) | (ly <- length(ny))) { + ## row.names() treated now; hence NOT with attributes(): + ax$row.names <- ay$row.names <- NULL + name.type.pretty <- "row.names" + msg <- c(msg ,local.compare.names()) + } + } + if (any(names(ax) == "dimnames") && any(names(ay) == "dimnames")) { + # We destructively remove dimnames, so loop from highest to lowest: + for (dim.i in length(dimnames(target)):1) { + nx <- dimnames(target)[[dim.i]] ; ny <- dimnames(current)[[dim.i]] + if((lx <- length(nx)) | (ly <- length(ny))) { + ## dimnames()[[dim.i]] treated now; hence NOT with attributes(): + ax$dimnames[[dim.i]] <- ay$dimnames[[dim.i]] <- NULL + name.type.pretty <- paste("dimnames[[" ,dim.i ,"]]" ,sep="") + msg <- c(msg ,local.compare.names()) + } + } + } + + if(length(ax) || length(ay)) {# some (more) attributes + ## order by names before comparison: + nx <- names(ax) + ny <- names(ay) + if(length(nx)) ax <- ax[order(nx)] + if(length(ny)) ay <- ay[order(ny)] + tt <- all.equal(ax, ay, ... ,debug.p=debug.p) + if(is.character(tt)) msg <- c(msg, paste("Attributes: <", tt, ">")) + } + + msg # NULL or character }