r/R/dagitty.R

#' @import V8 jsonlite
#' @importFrom boot boot
#' @importFrom MASS ginv
#' @importFrom grDevices dev.size
#' @importFrom methods is
#' @importFrom utils tail combn capture.output
#' @importFrom stats as.formula coef confint cov cov2cor cor lm pnorm pchisq qnorm quantile runif loess sd weighted.mean complete.cases rbinom cancor cor.test
#' @importFrom graphics abline arrows axis lines par plot plot.new segments strheight strwidth text xspline
NULL

#' Get Graph Type
#'
#' @param x the input graph.
#' @examples
#' graphType( "mag{ x<-> y }" ) == "mag"
#' @export
#'
graphType <- function( x ){
	x <- as.dagitty( x )
	xv <- .getJSVar()
	r <- NULL
	tryCatch({
		.jsassigngraph( xv, x )
		.jsassign( xv, .jsp("global.",
			xv,".getType()") )
		r <- .jsget( xv )
	}, 
	error=function(e) stop(e),
	finally={.deleteJSVar(xv)})
	r
}
'graphType<-' <- function( x, value=c("dag","mag","pdag","pag") ){
	value <- match.arg(value)
	x <- as.dagitty( x )
	xv <- .getJSVar()
	r <- NULL
	tryCatch({
		.jsassigngraph( xv, x )
		.jseval( paste0("global.",xv,".setType('",value,"')") )
		r <- .jsgetgraph( xv )
	}, 
	error=function(e) stop(e),
	finally={.deleteJSVar(xv)})
	r
}

#' Get Bundled Examples
#'
#' Provides access to the builtin examples of the dagitty website.
#'
#' @param x name of the example, or part thereof. Supported values are:
#' \describe{
#'  \item{"M-bias"}{ the M-bias graph.}
#'  \item{"confounding"}{ an extended confounding triangle.}
#'  \item{"mediator"}{ a small model with a mediator.}
#'  \item{"paths"}{ a graph with many variables but few paths}
#'  \item{"Sebastiani"}{ a small part of a genetics study (Sebastiani et al., 2005)}
#'  \item{"Polzer"}{ DAG from a dentistry study (Polzer et al., 2012)}
#'  \item{"Schipf"}{ DAG from a study on diabetes (Schipf et al., 2010)}
#'  \item{"Shrier"}{ DAG from a classic sports medicine example (Shrier & Platt, 2008)}
#'  \item{"Thoemmes"}{ DAG with unobserved variables 
#'	(communicated by Felix Thoemmes, 2013)}.
#'  \item{"Kampen"}{ DAG from a psychiatry study (van Kampen, 2014)}
#' }
#' @references
#' Sabine Schipf, Robin Haring, Nele Friedrich, Matthias Nauck, Katharina Lau,
#' Dietrich Alte, Andreas Stang, Henry Voelzke, and Henri Wallaschofski (2011),
#' Low total testosterone is associated with increased risk of incident
#' type 2 diabetes mellitus in men: Results from the study of health in
#' pomerania (SHIP). \emph{The Aging Male} \bold{14}(3):168--75.
#'
#' Paola Sebastiani, Marco F. Ramoni, Vikki Nolan, Clinton T. Baldwin, and
#' Martin H. Steinberg (2005), Genetic dissection and prognostic modeling of overt 
#' stroke in sickle cell anemia. \emph{Nature Genetics}, \bold{37}:435--440.
#' 
#' Ian Shrier and Robert W. Platt (2008), 
#' Reducing bias through directed acyclic graphs.
#' \emph{BMC Medical Research Methodology}, \bold{8}(70).
#'
#' Ines Polzer, Christian Schwahn, Henry Voelzke, Torsten Mundt, and Reiner
#' Biffar (2012), The association of tooth loss with all-cause and circulatory
#'  mortality. Is there a benefit of replaced teeth? A systematic review and
#'  meta-analysis. \emph{Clinical Oral Investigations}, \bold{16}(2):333--351.
#'
#' Dirk van Kampen (2014),
#' The SSQ model of schizophrenic prodromal unfolding revised: An
#'  analysis of its causal chains based on the language of directed graphs.
#' \emph{European Psychiatry}, \bold{29}(7):437--48.
#' 
#' @examples
#' g <- getExample("Shrier")
#' plot(g)
#'
#' @export
getExample <- function( x ){
	ct <- .getJSContext()
	xv <- .getJSVar()
	tryCatch({
		.jsassign( xv, x )
		r <- ct$eval( paste0("DagittyR.findExample(global.",xv,")") )
	}, finally={.deleteJSVar(xv)})
	if( r != "undefined" ){
		structure(r,class="dagitty")
	} else {
		stop("Example ",x," could not be found!")
	}
}


#' Implied Covariance Matrix of a Gaussian Graphical Model
#'
#' @inheritParams simulateSEM
#'
#' @export
impliedCovarianceMatrix <- function( x, b.default=NULL, b.lower=-.6, b.upper=.6, eps=1, standardized=TRUE ){
	if( !requireNamespace( "MASS", quietly=TRUE ) ){
		stop("This function requires the 'MASS' package!")
	}
	.supportsTypes( x, "dag" )
	x <- as.dagitty( x )
	e <- .edgeAttributes( x, "beta" )
	e$a <- as.double(as.character(e$a))
	b.not.set <- is.na(e$a)
	if( is.null( b.default ) ){
		e$a[b.not.set] <- runif(sum(b.not.set),b.lower,b.upper)
	} else {
		e$a[b.not.set] <- b.default
	}
	ovars <- names(x)
	nV <- length(ovars)
	nL <- sum(e$e=="<->")
	if( nrow(e) > 0 ){
		vars <- paste0("v",ovars)
		if( nL > 0 ){
			lats <- paste0("l",seq_len(nL))
		} else {
			lats <- c()
		}
		Beta <- matrix( 0, nrow=nV+nL, ncol=nV+nL )
		rownames(Beta) <- colnames(Beta) <- c(vars,lats)
		cL <- 1
		for( i in seq_len( nrow(e) ) ){
			b <- e$a[i]
			if( e$e[i] == "<->" ){
				lV <- paste0("l",cL) 
				lb <- sqrt(abs(b))
				Beta[lV,paste0("v",e$v[i])] <- lb
				if( b < 0 ){
					Beta[lV,paste0("v",e$w[i])] <- -lb
				} else {
					Beta[lV,paste0("v",e$w[i])] <- lb
				}
				cL <- cL + 1
			} else if( e$e[i] == "->" ){
				Beta[paste0("v",e$v[i]),paste0("v",e$w[i])] <- b
			}
		}
		L <- (diag( 1, nV+nL ) - Beta)
		Li <- MASS::ginv( L )
		if( standardized == TRUE ){
			Phi <- MASS::ginv( t(Li)^2 ) %*% rep(eps,nrow(Beta))
			Phi <- diag( c(Phi), nV+nL )
		} else {
			Phi <- diag( eps, nV+nL )
		}
		Sigma <- t(Li) %*% Phi %*% Li
	} else {
		Sigma <- diag(1,nV+nL)
	}
	Sigma <- Sigma[1:nV,1:nV]
	colnames( Sigma ) <- rownames( Sigma ) <- ovars
	Sigma[setdiff(ovars,latents(x)),setdiff(ovars,latents(x))]
}

#' Ancestral Relations
#'
#' Retrieve the names of all variables in a given graph that are in the specified 
#' ancestral relationship to the input variable \code{v}.
#'
#' @param x the input graph, of any type.
#' @param v name(s) of variable(s).
#' @param proper logical. By default (\code{proper=FALSE}), the \code{descendants} or \code{ancestors}
#' of a variable include the variable itself. For (\code{proper=TRUE}), the variable itself 
#' is not included.
#'
#' \code{descendants(x,v)} retrieves variables that are are reachable from \code{v} via 
#' a directed path.
#'
#' \code{ancestors(x,v)} retrieves variables from which \code{v} is reachable via a 
#' directed path.
#'
#' \code{children(x,v)} finds all variables \code{w} connected to \code{v} 
#' by an edge \eqn{v} -> \eqn{w}.
#'
#' \code{parents(x,v)} finds all variables \code{w} connected to \code{v} 
#' by an edge \eqn{w} -> \eqn{v}.
#'
#' \code{markovBlanket(x,v}) returns \code{x}'s parents, its children, and all other
#' parents of its children. The Markov blanket always renders \code{x} independent
#' of all other nodes in the graph.
#'
#' By convention, \code{descendants(x,v)} and \code{ancestors(x,v)} include 
#' \code{v} but \code{children(x,v)} and \code{parents(x,v)} do not. 
#' 
#' @name AncestralRelations
#'
#' @examples
#' g <- dagitty("graph{ a <-> x -> b ; c -- x <- d }")
#' # Includes "x"
#' descendants(g,"x")
#' # Does not include "x"
#' descendants(g,"x",TRUE)
#' parents(g,"x")
#' spouses(g,"x") 
#' 
NULL

#' @rdname AncestralRelations
#' @export
descendants <- function( x, v, proper=FALSE ){
	r <- .kins( x, v, "descendants" )
	if( proper ){
		setdiff( r, v )
	} else {
		r
	}
}

#' @rdname AncestralRelations
#' @export
ancestors <- function( x, v, proper=FALSE ){
	r <- .kins( x, v, "ancestors" )
	if( proper ){
		setdiff( r, v )
	} else {
		r
	}
}

#' @rdname AncestralRelations
#' @export
children <- function( x, v ){
	.kins( x, v, "children" )
}

#' @rdname AncestralRelations
#' @export
parents <- function( x, v ){
	.kins( x, v, "parents" )
}

#' @rdname AncestralRelations
#' @export
neighbours <- function( x, v ){
	.kins( x, v, "neighbours" )
}

#' @rdname AncestralRelations
#' @export
spouses <- function( x, v ){
	.kins( x, v, "spouses" )
}

#' @rdname AncestralRelations
#' @export
adjacentNodes <- function( x, v ){
	.kins( x, v, "adjacentNodes" )
}

#' @rdname AncestralRelations
#' @export
markovBlanket <- function( x, v ){
	setdiff( union( union( parents( x, v ), children( x, v ) ),
        	parents( x, children( x, v ) ) ), v )
}

#' Orient Edges in PDAG.
#'
#' Orients as many edges as possible in a  partially directed acyclic graph (PDAG)
#'  by converting induced subgraphs
#' X -> Y -- Z to X -> Y -> Z.
#'
#' @param x the input graph, a PDAG.
#' 
#' @examples
#' orientPDAG( "pdag { x -> y -- z }" )
#' @export
#'
orientPDAG <- function( x ){
	.supportsTypes( x, "pdag" )
	.graphTransformer( x, "cgToRcg" )
}

#' Convert DAG to MAG.
#'
#' Given a DAG, possibly with latent variables, construct a MAG that represents its
#' marginal independence model.
#'
#' @param x the input graph, a DAG
#' 
#' @examples
#' toMAG( "dag { ParentalSmoking->Smoking 
#'   { Profession [latent] } -> {Income->Smoking}
#'   Genotype -> {Smoking->LungCancer} }")
#' @export
#'
toMAG <- function( x ){
	.supportsTypes( x, "dag" )
	.graphTransformer( x, "dagToMag" )
}

#' Generating Equivalent Models
#' 
#' \code{equivalenceClass(x)} generates a complete partially directed acyclic graph 
#' (CPDAG) from an input DAG \code{x}. The CPDAG represents all graphs that are Markov 
#' equivalent to \code{x}: undirected
#' edges in the CPDAG can be oriented either way, as long as this does not create a cycle
#' or a new v-structure (a sugraph a -> m <- b, where a and b are not adjacent).
#' 
#' \code{equivalentDAGs(x,n)} enumerates at most \code{n} DAGs that are Markov equivalent
#' to the input DAG or CPDAG \code{x}.
#'
#' @param x the input graph, a DAG (or CPDAG for \code{equivalentDAGs}).
#' @param n maximal number of returned graphs.
#' @name EquivalentModels
#' @examples
#' # How many equivalent DAGs are there for the sports DAG example?
#' g <- getExample("Shrier")
#' length(equivalentDAGs(g))
#' # Plot all equivalent DAGs
#' par( mfrow=c(2,3) )
#' lapply( equivalentDAGs(g), plot )
#' # How many edges can be reversed without changing the equivalence class?
#' sum(edges(equivalenceClass(g))$e == "--")
NULL

#' @rdname EquivalentModels
#' @export
equivalenceClass <- function( x ){
	x <- as.dagitty( x )
	.supportsTypes( x, "dag" )
	.graphTransformer( x, "dagToCpdag" )
}

#' @rdname EquivalentModels
#' @export
equivalentDAGs <- function( x, n=100 ){
	x <- as.dagitty(x)
	.supportsTypes( x, c("dag","pdag") )
	xv <- .getJSVar()
	r <- NULL
	tryCatch({
		.jsassigngraph( xv, x )
		.jsassign( xv, .jsp("DAGitty.GraphTransformer.markovEquivalentDags(global.",
			xv,",",n,").map(function(x){return x.toString()})") )
		r <- .jsget( xv )
	}, 
	error=function(e) stop(e),
	finally={.deleteJSVar(xv)})
	lapply( r, dagitty )
}

#' Moral Graph
#'
#' Graph obtained from \code{x} by (1) \dQuote{marrying} (inserting an undirected
#' ede between) all nodes that have common children, and then replacing all edges
#' by undirected edges. If \code{x} contains bidirected edges, then all sets of 
#' nodes connected by a path containing only bidirected edges are treated like a 
#' single node (see Examples).
#'
#' @param x the input graph, a DAG, MAG, or PDAG.
#'
#' @examples
#' # returns a complete graph
#' moralize( "dag{ x->m<-y }" )
#' # also returns a complete graph
#' moralize( "dag{ x -> m1 <-> m2 <-> m3 <-> m4 <- y }" )
#'
#' @export
moralize <- function( x ){
	x <- as.dagitty( x )
	.supportsTypes( x, c("dag","mag","pdag") )
	.graphTransformer( x, "moralGraph" )
}

#' Extract Structural Part from Structural Equation Model
#'
#' Removes all observed variables from the input graph.
#'
#' @param x the input graph, a DAG.
#'
#' @details Assumes that x is a graph where there are edges between 
#' the latent variables, between the observed variables, and 
#' from latent to observed variables, but no edge between
#' a latent L and an observed X may have an arrowhead at L.
#
#' @export
structuralPart <- function( x ){
	x <- as.dagitty( x )
	.supportsTypes( x, c("dag","digraph") )
	.graphTransformer( x, "structuralPart" )
}

#' Extract Measurement Part from Structural Equation Model
#'
#' Removes all edges between latent variables, then removes any
#' latent variables without adjacent edges, then returns the graph.
#'
#' @param x the input graph, a DAG.
#'
#' @details Assumes that x is a graph where there are edges between 
#' the latent variables, between the observed variables, and 
#' from latent to observed variables, but no edge between
#' a latent L and an observed X may have an arrowhead at L.
#'
#' @export
measurementPart <- function( x ){
	x <- as.dagitty( x )
	.supportsTypes( x, c("dag","digraph") )
	.graphTransformer( x, "measurementPart" )
}

#' Back-Door Graph
#'
#' Removes every first edge on a proper causal path from \code{x}.
#' If \code{x} is a MAG or PAG, then only \dQuote{visible} directed
#' edges are removed (Zhang, 2008).
#'
#' @param x the input graph, a DAG, MAG, PDAG, or PAG.
#'
#' @references
#' J. Zhang (2008), Causal Reasoning with Ancestral Graphs. 
#' \emph{Journal of Machine Learning Research} 9: 1437-1474.
#' 
#' @examples
#' g <- dagitty( "dag { x <-> m <-> y <- x }" )
#' backDoorGraph( g ) # x->y edge is removed
#' 
#' g <- dagitty( "mag { x <-> m <-> y <- x }" )
#' backDoorGraph( g ) # x->y edge is not removed
#' 
#' g <- dagitty( "mag { x <-> m <-> y <- x <- i }" )
#' backDoorGraph( g ) # x->y edge is removed
#' 
#' @export
backDoorGraph <- function( x ){
	x <- as.dagitty(x)
	.supportsTypes( x, c("dag","mag","pdag","pag") )
	.graphTransformer( x, "backDoorGraph" )
}

#' Ancestor Graph
#'
#' Creates the induced subgraph containing only the vertices
#' in \code{v}, their ancestors, and the edges between them. All
#' other vertices and edges are discarded.
#'
#' @param x the input graph, a DAG, MAG, or PDAG.
#' @param v variable names.
#'
#' @details If the input graph is a MAG or PDAG, then all *possible* ancestors
#' will be returned (see Examples).
#' 
#' @examples
#' g <- dagitty("dag{ z <- x -> y }")
#' ancestorGraph( g, "z" )
#'
#' g <- dagitty("pdag{ z -- x -> y }")
#' ancestorGraph( g, "y" ) # includes z
#' 
#' @export
ancestorGraph <- function( x, v=NULL ){
	x <- as.dagitty(x)
	.supportsTypes( x, c("dag","mag","pdag") )
	if( is.null(v) ){
		v <- c(exposures(x),outcomes(x),adjustedNodes(x))
	} else {
		.checkAllNames( x, v )
		v <- as.list(v)
	}
	xv <- .getJSVar()
	xv2 <- .getJSVar()
	r <- NULL
	tryCatch({
		.jsassigngraph( xv, x )
		.jsassign( xv2, v )
		.jsassign( xv2, .jsp("DagittyR.getVertices(global.",xv,",global.",xv2,")") )
		.jsassign( xv, .jsp("DAGitty.GraphTransformer.ancestorGraph(global.",xv,",global.",xv2,")") )
		r <- .jsgetgraph( xv )
	}, 
	error=function(e) stop(e),
	finally={.deleteJSVar(xv);.deleteJSVar(xv2)})
	r
}

#' Variable Statuses
#'
#' Get or set variables with a given status in a graph. Variables in dagitty graphs can
#' have one of several statuses. Variables with status \emph{exposure} and 
#' \emph{outcome} are important when determining causal effects via the functions 
#' \code{\link{adjustmentSets}} and \code{\link{instrumentalVariables}}. Variables
#' with status \emph{latent} are assumed 
#' to be unobserved variables or latent constructs, which is respected when deriving
#' testable implications of a graph via the functions 
#' \code{\link{impliedConditionalIndependencies}} or \code{\link{vanishingTetrads}}.
#'
#' \code{setVariableStatus} first removes the given status from all variables in the graph
#' that had it, and then sets it on the given variables.
#' For instance, if  \code{status="exposure"}  and \code{value="X"} are given, then
#' \code{X} will be the only exposure in the resulting graph.
#'
#' @param x the input graph, of any type.
#' @param value character vector; names of variables to receive the given status.
#' @param status character, one of "exposure", "outcome" or "latent".
#'
#' @name VariableStatus
#'
#' @examples
#' g <- dagitty("dag{ x<->m<->y<-x }") # m-bias graph
#' exposures(g) <- "x"
#' outcomes(g) <- "y"
#' adjustmentSets(g)
#'
NULL

#' @rdname VariableStatus
#' @export
exposures <- function( x ){
	.nodesWithProperty( x, "source" )
}
#' @rdname VariableStatus
#' @export
'exposures<-' <- function( x, value ){
	setVariableStatus(x, "exposure", value )
}

#' @rdname VariableStatus
#' @export
outcomes <- function( x ){
	.nodesWithProperty( x, "target" )
}
#' @rdname VariableStatus
#' @export
'outcomes<-' <- function( x, value ){
	setVariableStatus(x, "outcome", value )
}

#' @rdname VariableStatus
#' @export
latents <- function( x ){
	.nodesWithProperty( x, "latentNode" )
}
#' @rdname VariableStatus
#' @export
'latents<-' <- function( x, value ){
	setVariableStatus(x, "latent", value )
}

#' @rdname VariableStatus
#' @export
adjustedNodes <- function( x ){
	.nodesWithProperty( x, "adjustedNode" )
}
#' @rdname VariableStatus
#' @export
'adjustedNodes<-' <- function( x, value ){
	setVariableStatus(x, "adjustedNode", value )
}


#' @rdname VariableStatus
#' @export
setVariableStatus <- function( x, status, value ) {
	allowed.statuses <-  c("exposure","outcome","latent","adjustedNode")
	if( !(status %in% allowed.statuses) ){
		stop( "Status must be one of: ", paste(allowed.statuses,collapse=", ") )
	}
	.checkAllNames( x, value )
	xv <- .getJSVar()
	vv <- .getJSVar()
	tryCatch({
		.jsassign( xv, as.character(x) )
		.jsassign( xv, .jsp("DAGitty.GraphParser.parseGuess(global.",xv,")") )
		if( status == "exposure" ){
			jsstatname <- "Source"
		} else if (status == "outcome" ){
			jsstatname <- "Target"
		} else if (status == "latent" ){
			jsstatname <- "LatentNode"
		} else if (status == "adjustedNode" ){
			jsstatname <- "AdjustedNode"
		}
		.jseval( paste0( "global.",xv,".removeAll",jsstatname,"s()" ) )
		for( n in value ){
			.jsassign( vv, n )
			.jseval( paste0( "global.",xv,".add",jsstatname,"(global.",vv,")" ) )
		}
		
		r <- .jsget( paste0( xv,".toString()" ) )
	},finally={ 
		.deleteJSVar(vv)
		.deleteJSVar(xv)
	})
	structure(r,class="dagitty")
}

#' Names of Variables in Graph
#'
#' Extracts the variable names from an input graph. Useful for iterating
#' over all variables.
#'
#' @param x the input graph, of any type.
#' @export
#' @examples
#' ## A "DAG" with Romanian and Swedish variable names. These can be
#' ## input using quotes to overcome the limitations on unquoted identifiers.
#' g <- dagitty( 'digraph {
#'   "coração" [pos="0.297,0.502"]
#'   "hjärta" [pos="0.482,0.387"]
#'   "coração" -> "hjärta"
#' }' )
#' names( g )
names.dagitty <- function( x ){
	ct <- .getJSContext()
	xv <- .getJSVar()
	tryCatch({
		.jsassigngraph( xv, x )
		.jsassign( xv, .jsp("global.",xv,".vertices.keys()") )
		r <- .jsget(xv)},
	finally={.deleteJSVar(xv)})
	r
}

#' Retrieve Exogenous Variables 
#'
#' Returns the names of all variables that have no directed arrow pointing to them.
#' Note that this does not preclude variables connected to bidirected arrows.
#' 
#' @param x the input graph, of any type.
#' @export
exogenousVariables <- function( x ){
	x <- as.dagitty( x )
	e <- edges( x )
	setdiff( names(x), as.character( e$w[e$e=="->"] ) )
}

#' Plot Coordinates of Variables in Graph
#'
#' The DAGitty syntax allows specification of plot coordinates for each variable in a 
#' graph. This function extracts these plot coordinates from the graph description in a
#' \code{dagitty} object. Note that the coordinate system is undefined, typically one 
#' needs to compute the bounding box before plotting the graph.
#'
#' @param x the input graph, of any type.
#' @param value a list with components \code{x} and \code{y}, 
#' giving relative coordinates for each variable. This format is suitable 
#' for \code{\link{xy.coords}}.
#' 
#' @examples
#' ## Plot localization of each node in the Shrier example
#' plot( coordinates( getExample("Shrier") ) )
#' 
#' ## Define a graph and set coordinates afterwards
#' x <- dagitty('dag{
#'     G <-> H <-> I <-> G
#'     D <- B -> C -> I <- F <- B <- A
#'     H <- E <- C -> G <- D
#' }')
#' coordinates( x ) <-
#'     list( x=c(A=1, B=2, D=3, C=3, F=3, E=4, G=5, H=5, I=5),
#'         y=c(A=0, B=0, D=1, C=0, F=-1, E=0, G=1, H=0, I=-1) )
#' plot( x )
#'
#' @seealso
#' Function \link{graphLayout} for automtically generating layout coordinates, and function
#' \link{plot.dagitty} for plotting graphs.
#'
#' @export
coordinates <- function( x ){
	ct <- .getJSContext()
	xv <- .getJSVar()
	yv <- .getJSVar()
	tryCatch({
		.jsassigngraph( xv, x )
		.jsassign( xv, .jsp("global.",xv,".getVertices()") )
		.jsassign( yv, .jsp("DagittyR.pluck(global.",xv,",'id')") )
		labels <- .jsget(yv)
		.jsassign( yv, .jsp("DagittyR.pluck(global.",xv,",'layout_pos_x')") )
		rx <- .jsget(yv)
		.jsassign( yv, .jsp("DagittyR.pluck(global.",xv,",'layout_pos_y')") )
		ry <- .jsget(yv)},
	finally={
		.deleteJSVar(xv)
		.deleteJSVar(yv)
	})
	names(rx) <- labels
	names(ry) <- labels
	list( x=rx, y=ry ) 
}

#' @rdname coordinates
#' @export
'coordinates<-' <- function( x, value ){
	xv <- .getJSVar()
	xv2 <- .getJSVar()
	tryCatch({
		.jsassigngraph( xv, x )
		if( is.null( value ) ){
			for( n in names(x) ){
				.jsassign( xv2, as.character(n) )
				.jseval(.jsp("delete global.",xv,".vertices.get(",xv2,
					")['layout_pos_x']"))
				.jseval(.jsp("delete global.",xv,".vertices.get(",xv2,
					")['layout_pos_y']"))
			}
		} else {
			for( n in intersect( names(value$x), names(x) ) ){
				.jsassign( xv2, as.character(n) )
				.jseval(.jsp("global.",xv,".vertices.get(",xv2,
					").layout_pos_x=",
					as.numeric(value$x[n])))
				.jseval(.jsp("global.",xv,".vertices.get(",xv2,
					").layout_pos_y=",
					as.numeric(value$y[n])))
			}
		}
		.jsassign( xv, .jsp("global.",xv,".toString()") )
		r <- .jsget( xv )
	}, 
	error=function(e) stop(e),
	finally={.deleteJSVar(xv);.deleteJSVar(xv2)})
	structure(r,class="dagitty")
}

#' Canonicalize an Ancestral Graph
#'
#' Takes an input ancestral graph (a graph with directed, bidirected and undirected
#' edges) and converts it to a DAG by replacing every bidirected edge x <-> y with a 
#' substructure x <- L -> y, where L is a latent variable, and every undirected edge
#' x -- y with a substructure x -> S <- y, where S is a selection variable. This function
#' does not check whether the input is actually an ancestral graph.
#' 
#' @param x the input graph, a DAG or MAG.
#' @return A list containing the following components:
#' \describe{
#'  \item{g}{The resulting graph.}
#'  \item{L}{Names of newly inserted latent variables.}
#'  \item{S}{Names of newly inserted selection variables.}
#' }
#' 
#' @examples
#' canonicalize("mag{x<->y--z}") # introduces two new variables
#' @export
canonicalize <- function( x ){
	x <- as.dagitty(x)
	.supportsTypes(x,c("dag","mag"))
	xv <- .getJSVar()
	xv2 <- .getJSVar()
	r <- NULL
	tryCatch({
		.jsassigngraph( xv, x )
		.jsassign( xv, .jsp("DAGitty.GraphTransformer.canonicalDag(global.",xv,")") )
		.jsassign( xv2, .jsp("global.",xv,".g.toString()") )
		g <- .jsget( xv2 )
		.jsassign( xv2, .jsp("DagittyR.pluck(",xv,".L,'id')") )
		L <- .jsget( xv2 )
		.jsassign( xv2, .jsp("DagittyR.pluck(",xv,".S,'id')") )
		S <- .jsget( xv2 )	
		r <- list( g=structure(g,class="dagitty"), L=L, S=S )
	}, 
	error=function(e) stop(e),
	finally={.deleteJSVar(xv);.deleteJSVar(xv2)})
	r
}

#' Graph Edges
#' 
#' Extracts edge information from the input graph. 
#'
#' @param x the input graph, of any type.
#' @return a data frame with the following variables:
#' \describe{
#'  \item{v}{ name of the start node.}
#'  \item{w}{ name of the end node. For symmetric edges (bidirected and undirected), the
#'  order of start and end node is arbitrary.}
#'  \item{e}{ type of edge. Can be one of \code{"->"}, \code{"<->"} and \code{"--"}.}
#'  \item{x}{ X coordinate for a control point. If this is not \code{NA}, then the edge
#'  is drawn as an \code{\link{xspline}} through the start point, this control point, 
#'  and the end point. This is especially important for cases where there is more than
#'  one edge between two variables (for instance, both a directed and a bidirected edge).}
#'  \item{y}{ Y coordinate for a control point.}
#' }
#'
#' @examples
#' ## Which kinds of edges are used in the Shrier example?
#' levels( edges( getExample("Shrier") )$e )
#' @export 
edges <- function( x ) UseMethod("edges")

#' @export
edges.character <- function( x ){
	edges( dagitty( x ) )
}

#' @export
edges.dagitty <- function( x ){
	xv <- .getJSVar()
	tryCatch({
		.jsassigngraph( xv, x )
		.jsassign( xv, .jsp("DagittyR.edge2r(global.",xv,")") )
		r <- .jsget(xv)
	}, finally={.deleteJSVar(xv)})
	as.data.frame(r)
}

#' Test for Graph Class
#' 
#' A function to check whether an object has class \code{dagitty}.
#'
#' @param x object to be tested.
#' 
#' @export
is.dagitty <- function(x) inherits(x,"dagitty")

#' Generate Graph Layout
#'
#' This function generates plot coordinates for each variable in a graph that does not
#' have them already. To this end, the well-known \dQuote{Spring} layout algorithm is
#' used. Note that this is a stochastic algorithm, so the generated layout will be 
#' different every time (which also means that you can try several times until you find
#' a decent layout).
#' 
#' @param x the input graph, of any type.
#' @param method the layout method; currently, only \code{"spring"} is supported.
#' @return the same graph as \code{x} but with layout coordinates added. 
#' 
#' @examples
#' ## Generate a layout for the M-bias graph and plot it
#' plot( graphLayout( dagitty( "dag { X <- U1 -> M <- U2 -> Y } " ) ) )
#' ## Plot larger graph and abbreviate its variable names.
#' plot( getExample("Shrier"), abbreviate.names=TRUE )
#'
#' @export
graphLayout <- function( x, method="spring" ){
	x <- as.dagitty( x )
	if( !(method %in% c("spring")) ){
		stop("Layout method ",method," not supported!")
	}
	xv <- .getJSVar()
	tryCatch({
		.jsassigngraph( xv, x )
		.jseval( paste0("(new DAGitty.GraphLayouter.Spring(global.",xv,")).layout()") )
		.jsassign( xv, .jsp("global.",xv,".toString()") )
		r <- .jsget(xv)
	}, finally={.deleteJSVar(xv)})
	structure( r, class="dagitty" )
}

#' Plot Graph
#'
#' A simple plot method to quickly visualize a graph. This is intended mainly for 
#' simple visualization purposes and not as a full-fledged graph drawing
#' function.
#'
#' @param x the input graph, a DAG, MAG, or PDAG.
#' @param abbreviate.names logical. Whether to abbreviate variable names.
#' @param show.coefficients logical. Whether to plot coefficients defined in the graph syntax
#'  on the edges.
#' @param adjust.coefficients numerical. Adjustment for coefficient labels; the distance between 
#'  the edge labels and the midpoint of the edge can be controlled using this paramer. 
#'  Can also be a vector of 2 numbers for separate horizontal and vertical adjustment. NA means
#'  no adjustment (default).
#' @param node.names If not NULL, a named vector or expression list 
#'  to rename the nodes.  
#' @param ... not used.
#' 
#' @details If \code{node.names} is not \code{NULL}, it should be a 
#'  named vector of characters or expressions to use to rename (some of) 
#'  the nodes, e.g. node \code{"X"} could be renamed using \code{expression(X = alpha^2)}.
#'
#' @examples
#'
#' # Showing usage of "node.names"
#' plot(dagitty('{x[pos="0,0"]}->{y[pos="1,0"]}'), node.names=expression(x = alpha^2, y=gamma^2))
#' 
#'
#' @export
plot.dagitty <- function( x,
	abbreviate.names=FALSE, 
	show.coefficients=FALSE,
	adjust.coefficients=NA,
	node.names=NULL,
	... ){	
	parms <- list(...)	
	x <- as.dagitty( x )
	.supportsTypes(x,c("dag","mag","pdag"))
	coords <- coordinates( x )
        if( any( !is.finite( coords$x ) | !is.finite( coords$y ) ) ){
			message("Plot coordinates for graph not supplied! Generating coordinates, see ?coordinates for how to set your own.")
			x <- graphLayout(x)
			coords <- coordinates(x)
        }
	if( abbreviate.names ){
		labels <- abbreviate(names(coords$x))
	} else {
		labels <- names(coords$x)
	}
	if(!is.null(node.names)){
	  names(labels) <- names(coords$x)
	  if(is.null(names(node.names)))
	    stop("'node.names' must be named")
	  if(!all(names(node.names) %in% names(labels)))
	    stop("node(s) not found: ", 
	         paste0("'", setdiff(names(node.names), names(labels)), "'", 
	               collapse = ","))
	  labels[names(node.names)] <- node.names
	  names(labels) <- names(coords$x) # If coerced to expression, names are lost
	}
	omar <- par("mar")
	par(mar=rep(0,4))
	plot.new()
	par(new=TRUE)
	wx <- sapply( labels, 
	              function(s) strwidth(s,units="inches") ) + strwidth("mm",units="inches")
	wy <- sapply( labels, 
	              function(s) strheight(s,units="inches") ) + strheight("\n")
	ppi.x <- dev.size("in")[1] / (max(coords$x)-min(coords$x))
	ppi.y <- dev.size("in")[2] / (max(coords$y)-min(coords$y))
	wx <- wx/ppi.x
	wy <- wy/ppi.y
	if( 'xlim' %in% names(parms) ){
		xlim <- parms$xlim
	} else {
		xlim <- c(min(coords$x-wx/2),max(coords$x+wx/2))
	}

	if( 'ylim' %in% names(parms) ){
		ylim <- parms$ylim
	} else {
		ylim <- c(-max(coords$y+wy/2),-min(coords$y-wy/2))
	}

	plot( NA, xlim=xlim, ylim=ylim, xlab="", ylab="", bty="n",
		xaxt="n", yaxt="n" )
	wx <- sapply( labels, strwidth ) + strwidth("xx") 
	wy <- sapply( labels, strheight ) + strheight("\n")
	asp <- par("pin")[1]/diff(par("usr")[1:2]) /
		(par("pin")[2]/diff(par("usr")[3:4]))
	ex <- edges(x)
	if( show.coefficients ){
		ea <- .edgeAttributes(x,"beta")
		ex <- merge( ex, ea )
	}
	ax1 <- rep(0,nrow(ex))
	ax2 <- rep(0,nrow(ex))
	ay1 <- rep(0,nrow(ex))
	ay2 <- rep(0,nrow(ex))
	axc <- rep(0,nrow(ex))
	ayc <- rep(0,nrow(ex))
	acode <- rep(2,nrow(ex))
	has.control.point <- rep(FALSE,nrow(ex))
	for( i in seq_len(nrow(ex)) ){
		if( ex[i,3] == "<->" ){
			acode[i] <- 3
			has.control.point[i] <- TRUE
		}
		if( ex[i,3] == "--" ){
			acode[i] <- 0
		}
		l1 <- as.character(ex[i,1]); l2 <- as.character(ex[i,2])
		x1 <- coords$x[l1]; y1 <- coords$y[l1]
		x2 <- coords$x[l2]; y2 <- coords$y[l2]
		if( is.na( ex[i,4] ) || is.na( ex[i,5] ) ){
			cp <- .autoControlPoint( x1, y1, x2, y2, asp,
				.2*as.integer( acode[i]==3 ) )
		} else {
			cp <- list(x=ex[i,4],y=ex[i,5])
			has.control.point[i] <- TRUE
		}
		bi1 <- .lineSegBoxIntersect( x1-wx[l1]/2,y1-wy[l1]/2,
			x1+wx[l1]/2,y1+wy[l1]/2, x1, y1, cp$x, cp$y )
		bi2 <- .lineSegBoxIntersect( x2-wx[l2]/2,y2-wy[l2]/2,
			x2+wx[l2]/2,y2+wy[l2]/2, cp$x, cp$y, x2, y2 )
		if( length(bi1) == 2 ){
			x1 <- bi1$x; y1 <- bi1$y
		}
		if( length(bi2) == 2 ){
			x2 <- bi2$x; y2 <- bi2$y
		}
		ax1[i] <- x1; ax2[i] <- x2
		ay1[i] <- y1; ay2[i] <- y2
		axc[i] <- cp$x; ayc[i] <- cp$y
	}
	directed <- acode==2 & !has.control.point
	undirected <- acode==0 & !has.control.point
	arrows( ax1[directed], -ay1[directed], 
		ax2[directed], -ay2[directed], length=0.1, col="gray" )
	segments( ax1[undirected], -ay1[undirected], 
		ax2[undirected], -ay2[undirected], col="black", lwd=2 )
	ic <- which( has.control.point )
	for( i in ic ){
		.arc( ax1[i], -ay1[i], 
			ax2[i], -ay2[i], axc[i], -ayc[i], 
			col=c("gray","black")[1+(acode[i]==0)], 
			code=acode[i], length=0.1, lwd=1+(acode[i]==0) )
	}
	if( show.coefficients ){
		axm <- (ax1+ax2)/2
		aym <- (ay1+ay2)/2
		axm[ic] <- axc[ic]
		aym[ic] <- ayc[ic]
		if( length(adjust.coefficients)==1 ){
			text( axm, -aym, as.character(ex$a), adj=adjust.coefficients*c(sign(ax1-ax2),sign(ay1-ay2)) )
		} else if( length(adjust.coefficients)==2 ){
			text( axm, -aym, as.character(ex$a), adj=adjust.coefficients )		
		}
	}
	text( coords$x, -coords$y, labels )
	par(mar=omar)
}

#' Covariate Adjustment Sets
#'
#' Enumerates sets of covariates that (asymptotically) allow unbiased estimation of causal
#' effects from observational data, assuming that the input causal graph is correct.  
#'
#' @param x the input graph, a DAG, MAG, PDAG, or PAG.
#' @param exposure name(s) of the exposure variable(s). If not given (default), then the 
#'  exposure variables are supposed to be defined in the graph itself.
#' @param outcome name(s) of the outcome variable(s), also taken from the graph if 
#' not given.
#' @param effect which effect is to be identified. If \code{effect="total"}, then the
#' total effect is to be identified, and the adjustment criterion by Perkovic et 
#' al (2015; see also van der Zander et al., 2014), 
#' an extension of Pearl's back-door criterion, is used. Otherwise, if 
#' \code{effect="direct"}, then the average direct effect is to be identified, and Pearl's
#' single-door criterion is used (Pearl, 2009). In a structural equation model (Gaussian
#' graphical model), direct effects are simply the path coefficients.
#' @param type which type of adjustment set(s) to compute. If \code{type="minimal"},
#' then only minimal sufficient adjustment sets are returned (default). For 
#' \code{type="all"}, all valid adjustment sets are returned. For \code{type="canonical"},
#' a single adjustment set is returned that consists of all (possible) ancestors
#' of exposures and outcomes, minus (possible) descendants of nodes on proper causal
#' paths. This canonical adjustment set is always valid if any valid set exists
#' at all.
#' @param max.results integer. The listing of adjustment set is stopped once
#' this many results have been found. Use \code{Inf} to generate them all. 
#' This only applys when \code{type="minimal"}.
#'
#' @details
#' If the input graph is a MAG or PAG, then it must not contain any undirected
#' edges (=hidden selection variables).
#'
#' @references
#' J. Pearl (2009), Causality: Models, Reasoning and Inference. 
#' Cambridge University Press.
#'
#' B. van der Zander, M. Liskiewicz and J. Textor (2014),
#' Constructing separators and adjustment sets in ancestral graphs.
#' In \emph{Proceedings of UAI 2014.}
#' 
#' E. Perkovic, J. Textor, M. Kalisch and M. H. Maathuis (2015), A
#' Complete Generalized Adjustment Criterion. In \emph{Proceedings of UAI
#' 2015.}
#' 
#' @examples
#' # The M-bias graph showing that adjustment for 
#' # pre-treatment covariates is not always valid
#' g <- dagitty( "dag{ x -> y ; x <-> m <-> y }" )
#' adjustmentSets( g, "x", "y" ) # empty set
#' # Generate data where true effect (=path coefficient) is .5
#' set.seed( 123 ); d <- simulateSEM( g, .5, .5 )
#' confint( lm( y ~ x, d ) )["x",] # includes .5
#' confint( lm( y ~ x + m, d ) )["x",] # does not include .5
#'
#' # Adjustment sets can also sometimes be computed for graphs in which not all 
#' # edge directions are known
#' g <- dagitty("pdag { x[e] y[o] a -- {i z b}; {a z i} -> x -> y <- {z b} }")
#' adjustmentSets( g )
#' @export
adjustmentSets <- function( x, exposure=NULL, outcome=NULL, 
	type=c("minimal","canonical","all"), effect=c("total","direct"),
	max.results=Inf ){
	effect <- match.arg( effect )
	type <- match.arg( type )

	if( is.null(exposure) ){
		exposure <- exposures(x)
	}
	if( is.null(outcome) ){
		outcome <- outcomes(x)
	}

	if( effect == "direct" && type != "minimal" ){
		stop("Only minimal adjustment sets are supported for direct effects!")
	}
	x <- as.dagitty( x )
	.supportsTypes( x, c("dag","mag","pdag","pag") )
	.checkAllNames( x, c(exposure, outcome) )
	if( length(exposure) == 0 || length(outcome) == 0 ){
		stop("Both exposure(s) and outcome(s) need to be set!")
	}
	exposures(x) <- exposure
	outcomes(x) <- outcome

	if( type == "minimal" ){
		xv <- .getJSVar()
		command.close <- ")"
		if( is.finite( max.results ) ){
			command.close <- paste(",[],[],",as.integer(max.results),")")
		}
		tryCatch({
			.jsassigngraph( xv, x )
			if( effect=="direct" ){	
				.jsassign( xv, .jsp("DAGitty.GraphAnalyzer.listMsasDirectEffect(global.",xv,command.close) )
			} else {
				.jsassign( xv, .jsp("DAGitty.GraphAnalyzer.listMsasTotalEffect(global.",xv,command.close) )
			}
			.jsassign( xv, .jsp("DagittyR.adj2r(global.",xv,")"))
			r <- structure( .jsget(xv), class="dagitty.sets" )
		},finally={.deleteJSVar(xv)})
	} else if( type == "all" ){
		covariates <- setdiff( names( x ), c( exposure, outcome ) )
		subsets <- (expand.grid( rep( list(0:1),length(covariates)) ))
		r <- lapply( 1:nrow(subsets), function(i){
			Z <- covariates[as.logical(subsets[i,])]
			if( isAdjustmentSet( x, Z ) ){
				Z
		    	} else {
				NA
			}
		})
		non.r <- which(sapply(r,function(x) isTRUE(is.na(x))))
		if( length(non.r) > 0 ){
			r <- r[-non.r]
		} 
		r <- structure(r,class="dagitty.sets")
	} else { # type == "canonical"
		xv <- .getJSVar()
		tryCatch({
			.jsassigngraph( xv, x )
			.jsassign( xv, .jsp("DagittyR.canonicalAdjustment(global.",xv,")"))
			r <- structure(.jsget(xv),class="dagitty.sets")
		},finally={.deleteJSVar(xv)})
	}
	r
}

#' Adjustment Criterion
#' 
#' Test whether a set fulfills the adjustment criterion, that means,
#' it removes all confounding bias when estimating a *total* effect.
#' This is an #' Back-door criterion (Shpitser et al, 2010; van der Zander et al, 
#' 2014; Perkovic et al, 2015) 
#' which is complete in the sense that either a set
#' fulfills this criterion, or it does not remove all confounding bias.
#'
#' @param x the input graph, a DAG, MAG, PDAG, or PAG.
#' @param exposure name(s) of the exposure variable(s). If not given (default), then the 
#'  exposure variables are supposed to be defined in the graph itself.
#' @param outcome name(s) of the outcome variable(s), also taken from the graph if 
#' not given.
#' @param Z vector of variable names.
#'
#' @details
#' If the input graph is a MAG or PAG, then it must not contain any undirected
#' edges (=hidden selection variables).
#'
#' @references
#'
#' E. Perkovic, J. Textor, M. Kalisch and M. H. Maathuis (2015), A
#' Complete Generalized Adjustment Criterion. In \emph{Proceedings of UAI
#' 2015.}
#'
#' I. Shpitser, T. VanderWeele and J. M. Robins (2010), On the
#' validity of covariate adjustment for estimating causal effects. In
#' \emph{Proceedings of UAI 2010.}
#'
#' @export
isAdjustmentSet <- function( x, Z, exposure=NULL, outcome=NULL ){
	x <- as.dagitty( x )
	.supportsTypes( x, c("dag","mag","pdag","pag") )
	xv <- .getJSVar()
	Zv <- .getJSVar()

	if( is.null(exposure) ){
		exposure <- exposures(x)
	}
	if( is.null(outcome) ){
		outcome <- outcomes(x)
	}
	.checkAllNames( x, c(exposure, outcome) )
	if( length(exposure) == 0 || length(outcome) == 0 ){
		stop("Both exposure(s) and outcome(s) need to be set!")
	}
	exposures(x) <- exposure
	outcomes(x) <- outcome

	tryCatch({
		.jsassigngraph( xv, x )
		.jsassign( Zv, as.list(Z) )
		.jsassign( xv, .jsp("DAGitty.GraphAnalyzer.isAdjustmentSet(",xv,",",Zv,")") )
		r <- .jsget(xv)
	},finally={
		.deleteJSVar(xv)
		.deleteJSVar(Zv)
	})
	r
}

#' Test for Cycles
#'
#' \code{isAcyclic(x)} returns \code{TRUE} if the given graph does not contain a directed cycle.
#'
#' \code{findCycle(x)} will try to find at least one cycle in x and return it as a list of node names.
#'
#' @param x the input graph, of any graph type.
#'
#' @details These functions will only consider simple directed edges in the
#' given graph.
#'
#' @examples
#' g1 <- dagitty("dag{X -> Y -> Z}")
#' stopifnot( isTRUE(isAcyclic( g1 )) )
#' g2 <- dagitty("dag{X -> Y -> Z -> X}")
#' stopifnot( isTRUE(!isAcyclic( g2 )) )
#' g3 <- dagitty("mag{X -- Y -- Z -- X}")
#' stopifnot( isTRUE(isAcyclic( g3 )) )
#' @export
isAcyclic <- function( x ){
	x <- as.dagitty( x )
	xv <- .getJSVar()
	tryCatch({
		.jsassigngraph( xv, x )
		.jsassign( xv, .jsp("DAGitty.GraphAnalyzer.containsCycle(",xv,")===false") )
		r <- .jsget(xv)
	},finally={
		.deleteJSVar(xv)
	})
	r
}

#' @rdname isAcyclic
#' @export
findCycle <- function( x ){
	x <- as.dagitty( x )
	xv <- .getJSVar()
	xv2 <- .getJSVar()
	xv3 <- .getJSVar()
	tryCatch({
		.jsassigngraph( xv, x )
		vv <- names( x )
		.jseval( paste0("var cycle = ''; var cc = '';
				for( ",xv2," of ",xv,".getVertices() ){
					",xv,".clearVisited();
					",xv3," = DAGitty.GraphAnalyzer.searchCycleFrom(",xv2,")
					if( typeof ",xv3," != 'undefined' ){
						break
					}
				}
				if( typeof ",xv3," == 'undefined' ){
					",xv," = []
				} else {
					",xv," = ",xv3,"
				}
			") )
		r <- .jsget(xv)
	},finally={
		.deleteJSVar(xv); .deleteJSVar(xv2); .deleteJSVar(xv3)
	})
	if( length(r) > 0 ){
		for( vi in seq_along(r) ){
			if( sum( r == r[vi] ) > 1 ){
				r <- r[vi:length(r)]; break
			}
		}
	}
	r
}

#' Test for Colliders
#'
#' Returns \code{TRUE} if three given variables form a collider in a given graph.
#'
#' @param x the input graph, a DAG.
#' @param u the first endpoint of the putative collider
#' @param v the midpoint of the putative collider
#' @param w the second endpoint of the putative collider
#' @examples
#' g1 <- dagitty("dag{X -> Y -> Z}")
#' stopifnot( isTRUE(!isCollider( g1, "X", "Y", "Z" )) )
#' g2 <- dagitty("dag{X -> Y <- Z }")
#' stopifnot( isTRUE(isCollider( g2, "X", "Y", "Z" )) )
#' @export
isCollider <- function( x, u, v, w ) {
	x <- as.dagitty( x )
	.supportsTypes( x, c("dag") )
	.checkName( x, v )
	v %in% intersect( children( x, u ), children( x, w ) )
}

#' List Implied Conditional Independencies
#'
#' Generates a list of conditional independence statements that must hold in every
#' probability distribution compatible with the given model.
#'
#' @param x the input graph, a DAG, MAG, or PDAG.
#' @param type can be one of "missing.edge", "basis.set", or "all.pairs". With the first, one or 
#' more minimal testable implication (with the smallest possible conditioning set)
#' is returned per missing edge of the graph. With "basis.set", one testable
#' implication is returned per vertex of the graph that has non-descendants other than
#' its parents. Basis sets can be smaller, but they involve higher-dimensional independencies,
#' whereas missing edge sets involve only independencies between two variables at a time.
#' With "all.pairs", the function will return a list of all implied conditional independencies
#' between two variables at a time. Beware, because this can be a very long list and it 
#' may not be feasible to compute this except for small graphs.
#' @param max.results integer. The listing of conditional independencies is stopped once
#' this many results have been found. Use \code{Inf} to generate them all. This applies
#' only when \code{type="missing.edge"} or \code{type="all"}.
#' @examples
#' g <- dagitty( "dag{ x -> m -> y }" )
#' impliedConditionalIndependencies( g ) # one
#' latents( g ) <- c("m")
#' impliedConditionalIndependencies( g ) # none
#' @export
impliedConditionalIndependencies <- function( x, type="missing.edge", max.results=Inf ){
	type <- match.arg( type, c("missing.edge","basis.set", "all.pairs") )
	x <- as.dagitty( x )
	.supportsTypes( x, c("dag","mag","pdag") )
	if( type == "all.pairs" ){
		vars <- names( x )
		if( length(vars) < 2 ){
			return( structure( list(), class="dagitty.cis" ) )
		}
		r <- unlist( combn( vars, 2, FUN=function(v){
			covariates <- setdiff( vars, v )
			if( length(covariates) == 0 ){
				if( dseparated( x, v[1], v[2] ) ){
					return(list(structure(
						list(X=v[1],Y=v[2],Z=list()), 
						class="dagitty.ci")
					))
				} else {
					return(list())
				}
			}
			subsets <- expand.grid( rep( list(c(F,T)),length(covariates)) )
			wh <- which( apply( subsets, 1, 
				function( w ) dseparated( x, v[1], v[2], covariates[w] ) ) )
			if( length(wh) > 0 ){
				lapply( wh, function(i) structure(
					list(X=v[1],Y=v[2],Z=unlist(covariates[unlist(subsets[i,])]) ), class="dagitty.ci") )
			} else {
				list()
			}
		}, simplify=FALSE), recursive=FALSE)
		return( structure(r, class="dagitty.cis" ) )
	}

	xv <- .getJSVar()
	tryCatch({
		.jsassign( xv, as.character(x) )
		.jsassign( xv, .jsp("DAGitty.GraphParser.parseGuess(global.",xv,")") )

		if( type == "missing.edge" ){
			if( is.finite( max.results ) ){
				.jsassign( xv,
					.jsp("DAGitty.GraphAnalyzer.listMinimalImplications(global.",xv,",",
					as.numeric(max.results),")"))
			} else {
				.jsassign( xv, 
					.jsp("DAGitty.GraphAnalyzer.listMinimalImplications(global.",xv,")"))
			}
		} else {
			.jsassign( xv, .jsp("DAGitty.GraphAnalyzer.listBasisImplications(global.",xv,")"))
		}
		.jsassign( xv, .jsp("DagittyR.imp2r(global.",xv,")") )
		r  <- structure( lapply( .jsget(xv), 
				function(x) structure(x,class="dagitty.ci") ), class="dagitty.cis" )
	},finally={.deleteJSVar(xv)})
	r
}

#' Find Instrumental Variables
#'
#' Generates a list of instrumental variables that can be used to infer the total effect
#' of an exposure on an outcome in the presence of latent confounding, under linearity
#' assumptions. 
#'
#' @param x the input graph, a DAG.
#' @param exposure name of the exposure variable. If not given (default), then the 
#' exposure variable is supposed to be defined in the graph itself. Only a single
#' exposure variable and a single outcome variable supported.
#' @param outcome name of the outcome variable, also taken from the graph if not given.
#' Only a single outcome variable is supported.
#'
#' @references
#' B. van der Zander, J. Textor and M. Liskiewicz (2015),
#' Efficiently Finding Conditional Instruments for Causal Inference.
#' In \emph{Proceedings of the 24th International Joint Conference on 
#' Artificial Intelligence (IJCAI 2015)}, pp. 3243-3249. AAAI Press, 2015.
#'
#' @examples
#' # The classic IV model
#' instrumentalVariables( "dag{ i->x->y; x<->y }", "x", "y" )
#' # A conditional instrumental variable
#' instrumentalVariables( "dag{ i->x->y; x<->y ; y<-z->i }", "x", "y" )
#' @export
instrumentalVariables <- function( x, exposure=NULL, outcome=NULL ){
	x <- as.dagitty( x )
	.supportsTypes( x, "dag" )
	.checkAllNames( x, c(exposure, outcome) )
	if( is.null(exposure) ){
		exposure <- exposures(x)
	}
	if( is.null(outcome) ){
		outcome <- outcomes(x)
	}
	if( length(exposure) != 1 || length(outcome) != 1 ){
		stop("Both exposure(s) and outcome(s) need to be set to exactly one variable!")
	}
	exposures(x) <- exposure
	outcomes(x) <- outcome

	xv <- .getJSVar()
	tryCatch({
		.jsassigngraph( xv, x )
		.jsassign( xv, .jsp("DAGitty.GraphAnalyzer.conditionalInstruments(global.",xv,")") )
		.jsassign( xv, .jsp("DagittyR.iv2r(global.",xv,")") )
		r <- structure( .jsget(xv), class="dagitty.ivs" )
	}, finally={.deleteJSVar(xv)})
	r
}

#' List Implied Vanishing Tetrads
#'
#' Interpret the given graph as a structural equation model and list all the
#' vanishing tetrads that it implies.
#'
#' @param x the input graph, a DAG.
#' @param type restrict output to one level of Kenny's tetrad typology.
#' Possible values are "within" (homogeneity within constructs; all four
#' variables have the same parents), "between" (homogeneity between constructs;
#' two pairs of variables each sharing one parent) 
#' and "epistemic" (consistency of epistemic correlations; three variables have
#' the same parent). By default, all tetrads are listed.
#' @return a data frame with four columns, where each row of the form
#' i,j,k,l means that the tetrad Cov(i,j)Cov(k,l) - Cov(i,k)Cov(j,l) vanishes
#' (is equal to 0) according to the model.
#' 
#' @examples
#' # Specify two-factor model with 4 indicators each
#' g <- dagitty("dag{{x1 x2 x3 x4} <- x <-> y -> {y1 y2 y3 y4}}")
#' latents(g) <- c("x","y")
#'
#' # Check how many tetrads are implied
#' nrow(vanishingTetrads(g))
#' # Check how these distribute across the typology
#' nrow(vanishingTetrads(g,"within"))
#' nrow(vanishingTetrads(g,"between"))
#' nrow(vanishingTetrads(g,"epistemic"))
#'
#' @references
#' Kenny, D. A. (1979), Correlation and Causality. Wiley, New York.
#'
#' @export
vanishingTetrads <- function( x, type=NA ){
	x <- as.dagitty( x )

	xv <- .getJSVar()
	tryCatch({
		.jsassigngraph( xv, x )
		if( is.character( type ) ){
			.jsassign( xv, .jsp("DAGitty.GraphAnalyzer.vanishingTetrads(global.",
				xv,",undefined,'",type,"')") )
		} else {
			.jsassign( xv, .jsp("DAGitty.GraphAnalyzer.vanishingTetrads(global.",
				xv,")") )
	
		}
		r <- .jsget(xv)
	}, finally={.deleteJSVar(xv)})

	r
}


#' Parse DAGitty Graph
#'
#' Constructs a \code{dagitty} graph object from a textual description. 
#'
#' @param x character, string describing a graphical model in dagitty syntax.
#' @param layout logical, whether to automatically generate layout coordinates for each
#' variable (see \code{\link{graphLayout}}) 
#' @details
#' The textual syntax for DAGitty graph is based on the dot language of the 
#' graphviz software (\url{https://graphviz.gitlab.io/_pages/doc/info/lang.html}). This is a
#' fairly intuitive syntax -- use the examples below and in the other functions to
#' get you started. An important difference to graphviz is that the DAGitty language
#' supports several types of graphs, which have different semantics. However, many users
#' will mainly focus on DAGs.
#'
#' A DAGitty graph description has the following form:
#'
#' \code{[graph type] '{' [statements] '}'}
#'
#' where \code{[graph type]} is one of 'dag', 'mag', 'pdag', or 'pag' and \code{[statements]}
#' is a list of variables statements and edge statements, which may (optionally) be
#' separated by semicolons. Whitespace, including newlines, has no semantic role.
#'
#' Variable statments look like
#'
#' \code{[variable id] '[' [properties] ']'}
#'
#' For example, the statement
#'
#' \code{x [exposure,pos="1,0"]}
#'
#' declares a variable with ID x that is an exposure variable and has a layout position
#' of 1,0.
#'
#' The edge statement
#'
#' \code{x -> y}
#'
#' declares a directed edge from variable x to variable y. Explicit variable statements
#' are not required for the variables involved in edge statements, unless attributes 
#' such as position or exposure/outcome status need to be set.
#'
#' DAGs (directed acyclic graphs) can contain the following edges: \code{->}, \code{<->}. 
#' Bidirected edges in DAGs are simply shorthands for substructures \code{<- U ->}, 
#' where U is an unobserved variable.
#'
#' MAGs (maximal ancestral graphs) can contain the following edges: \code{->},
#' \code{<->}, \code{--}. 
#' The bidirected and directed edges of MAGs can represent latent confounders, and 
#' the undirected edges represent latent selection variables. 
#' For details, see Richardson and Spirtes (2002).
#'
#' PDAGs (partially directed acyclic graphs) can contain the following edges: \code{->},
#' \code{<->}, \code{--}. 
#' The bidirected edges mean the same thing as in DAGs. The undirected edges represent
#' edges whose direction is not known. Thus, PDAGs are used to represent equivalence
#' classes of DAGs (see also the function \code{\link{equivalenceClass}}).
#'
#' PAGs (partial ancestral graphs) are to MAGs what PDAGs are to DAGs: they represent
#' equivalence classes of MAGs. MAGs can contain the following edges: \code{@-@}, 
#' \code{->}, \code{@->}, \code{--}, \code{@--}
#' (the @ symbols are written as circle marks in most of the literature). For
#' details on PAGs, see Zhang et al (2008). For now, only a few DAGitty functions
#' support PAGs (for instance, \code{\link{adjustmentSets}}.
#'
#' The DAGitty parser does not perform semantic validation. That is, 
#' it will not check whether a DAG is actually acyclic, or whether all chain components
#' in a PAG are actually chordal. This is not done because it can be computationally
#' rather expensive.
#'
#' @references
#' Richardson, Thomas; Spirtes, Peter (2002), Ancestral graph Markov models.
#' \emph{The Annals of Statistics} 30(4): 962-1030.
#'
#' J. Zhang (2008), Causal Reasoning with Ancestral Graphs. 
#' \emph{Journal of Machine Learning Research} 9: 1437-1474.
#'
#' B. van der Zander and M. Liskiewicz (2016), 
#' Separators and Adjustment Sets in Markov Equivalent DAGs.
#' In \emph{Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence (AAAI'16)}, 
#' Phoenix, Arizona, USA.
#' @examples
#' # Specify a simple DAG containing one path
#' g <- dagitty("dag{ 
#'   a -> b ;
#'   b -> c ;
#'   d -> c
#'  }")
#' # Newlines and semicolons are optional
#' g <- dagitty("dag{ 
#'   a -> b b -> c c -> d
#'  }")
#' # Paths can be specified in one go; the semicolon below is
#' # optional
#' g <- dagitty("dag{ 
#'   a -> b ->c ; c -> d
#'  }")
#' # Edges can be written in reverse notation
#' g <- dagitty("dag{ 
#'   a -> b -> c <- d
#'  }")
#' # Spaces are optional as well
#' g <- dagitty("dag{a->b->c<-d}")
#' # Variable attributes can be set in square brackets
#' # Example: DAG with one exposure, one outcome, and one unobserved variable
#' g <- dagitty("dag{
#'   x -> y ; x <- z -> y
#'   x [exposure]
#'   y [outcome]
#'   z [unobserved]
#' }") 
#' # The same graph as above
#' g <- dagitty("dag{x[e]y[o]z[u]x<-z->y<-x}")
#' # A two-factor latent variable model
#' g <- dagitty("dag {
#'   X <-> Y
#'   X -> a X -> b X -> c X -> d
#'   Y -> a Y -> b Y -> c Y -> d
#' }")
#' # Curly braces can be used to "group" variables and 
#' # specify edges to whole groups of variables
#' # The same two-factor model
#' g <- dagitty("dag{ {X<->Y} -> {a b c d} }")
#' # A MAG
#' g <- dagitty("mag{ a -- x -> y <-> z }")
#' # A PDAG
#' g <- dagitty("pdag{ x -- y -- z }")
#' # A PAG
#' g <- dagitty("pag{ x @-@ y @-@ z }")  
#' @export
dagitty <- function(x, layout=FALSE){
	if(!is.character(x)){
		stop("Expecting a string to build dagitty graph!")
	}
	xv <- .getJSVar()
	tryCatch({
		.jsassign( xv, as.character(x) )
		.jsassign( xv, .jsp("DAGitty.GraphParser.parseGuess(global.",xv,").toString()") )
		r <- structure( .jsget(xv), class="dagitty" )
	}, error=function(e){
		stop( e )
	},finally={.deleteJSVar(xv)})
	r <- structure( r, class="dagitty" )
	if( layout ){
		r <- graphLayout(r)
	}
	r
}

#' Load Graph from dagitty.net
#'
#' Downloads a graph that has been built and stored online using the dagitty.net GUI.
#' Users who store graphs online will receive a unique URL for their graph, which
#' can be fed into this function to continue working with the graph in R.
#'
#' @param x dagitty model URL.
#' @export
downloadGraph <- function(x="dagitty.net/mz-Tuw9"){
	if( !requireNamespace( "base64enc", quietly=TRUE ) ){
		stop("This function requires the package 'base64enc'!")
	}
	id <- gsub( "dagitty\\.net\\/m(.*)$", "\\1", x )
	r <- base64enc::base64decode(scan(paste0("http://dagitty.net/dags/load.php?id=",id),"character"))
	if( base64enc::checkUTF8(r) ){
		dagitty( rawToChar( r ) )
	} else {
		NULL
	}
}

#' Test Graph against Data
#'
#' Derives testable implications from the given graphical model and tests them against
#' the given dataset.
#'
#' @param x the input graph, a DAG, MAG, or PDAG. Either an input graph or an explicit
#'  list of tests needs to be specified.
#' @param tests list of the precise tests to perform. If not given, the list
#'  of tests is automatically derived from the input graph. Can be used to restrict 
#'  testing to only a certain subset of tests (for instance, to test only those conditional
#'  independencies for which the conditioning set is of a reasonably low dimension, such
#'  as shown in the example). 
#' @param X vector of variable names.
#' @param Y vector of variable names.
#' @param Z vector of variable names.
#' @param data matrix or data frame containing the data.
#' @param sample.cov the sample covariance matrix; ignored if \code{data} is supplied.
#' Either \code{data} or \code{sample.cov} and \code{sample.nobs} must be supplied.
#' @param sample.nobs number of observations; ignored if \code{data} is supplied.
#' @param type character indicating which kind of local
#'  test to perform. Supported values are \code{"cis"} (linear conditional independence),
#'  \code{"cis.loess"} (conditional independence using loess regression), 
#'  \code{"cis.chisq"} (for categorical data, based on the chi-square test),
#'  \code{"cis.pillai"} (for mixed data, based on canonical correlations),
#'  \code{"tetrads"} and \code{"tetrads.type"}, where "type" is one of the items of the 
#'  tetrad typology, e.g. \code{"tetrads.within"} (see \code{\link{vanishingTetrads}}).
#'  Tetrad testing is only implemented for DAGs.
#' @param R how many bootstrap replicates for estimating confidence
#'   intervals. If \code{NULL}, then confidence intervals are based on normal
#'   approximation. For tetrads, the normal approximation is only valid in 
#'   large samples even if the data are normally distributed.
#' @param tol bound value for tolerated deviation from local test value. By default, we perform
#'    a two-sided test of the hypothesis theta=0. If this parameter is given, the test changes
#'    to abs(theta)=tol versus abs(theta)>tol.
#' @param max.conditioning.variables for conditional independence testing, this 
#'    parameter can be used to perform only those tests where the number of conditioning
#'   variables does not exceed the given value. High-dimensional
#'   conditional independence tests can be very unreliable.
#' @param abbreviate.names logical. Whether to abbreviate variable names (these are used as 
#'   row names in the returned data frame).
#' @param conf.level determines the size of confidence intervals for test
#'   statistics.
#' @param ... parameters passed on from \code{ciTest} to \code{localTests}
#' @param loess.pars list of parameter to be passed on to  \code{\link[stats]{loess}}
#'   (for \code{type="cis.loess"}), for example the smoothing range.
#'
#' \code{ciTest(X,Y,Z,data)} is a convenience function to test a single conditional independence
#' independently of a DAG.
#'
#' @details Tetrad implications can only be derived if a Gaussian model (i.e., a linear
#' structural equation model) is postulated. Conditional independence implications (CI)
#' do not require this assumption. However, both Tetrad and CI implications are tested
#' parametrically: for Tetrads, Wishart's confidence interval formula is used, whereas
#' for CIs, a Z test of zero conditional covariance (if the covariance
#' matrix is given) or a test of residual independence after linear regression
#' (it the raw data is given) is performed.
#' Both tetrad and CI tests also support bootstrapping instead of estimating parametric
#' confidence intervals.
#' For the canonical correlations approach, all ordinal variables are integer-coded, and 
#' all categorical variables are dummy-coded (omitting the dummy representing the most frequent 
#' category). To text X _||_ Y | Z, we first regress both X and Y (which now can be multivariate)
#' on Z, and then we compute the canonical correlations between the residuals. The effect size
#' is the root mean square canonical correlation (closely related to Pillai's trace, which is the 
#' root of the squared sum of all canonical correlations).
#' 
#' @examples
#' # Simulate full mediation model with measurement error of M1
#' set.seed(123)
#' d <- simulateSEM("dag{X->{U1 M2}->Y U1->M1}",.6,.6)
#' 
#' # Postulate and test full mediation model without measurement error
#' r <- localTests( "dag{ X -> {M1 M2} -> Y }", d, "cis" )
#' plotLocalTestResults( r )
#'
#' # Simulate data from example SEM
#' g <- getExample("Polzer")
#' d <- simulateSEM(g,.1,.1)
#' 
#' # Compute independencies with at most 3 conditioning variables
#' r <- localTests( g, d, "cis.loess", R=100, loess.pars=list(span=0.6),
#'         max.conditioning.variables=3 )
#' plotLocalTestResults( r )
#'
#' # Test independencies for categorical data using chi-square test
#' d <- simulateLogistic("dag{X->{U1 M2}->Y U1->M1}",2)
#' localTests( "dag{X->{M1 M2}->Y}", d, type="cis.chisq" )
#'
#' @export
localTests <- function(x=NULL, data=NULL, 
	type=c("cis","cis.loess","cis.chisq","cis.pillai",
	       "tetrads","tetrads.within","tetrads.between","tetrads.epistemic"),
	tests=NULL,
	sample.cov=NULL,sample.nobs=NULL,
	conf.level=.95,R=NULL,
	max.conditioning.variables=NULL,
	abbreviate.names=TRUE,
	tol=NULL,
	loess.pars=NULL){
	type <- match.arg(type)
	if( !is.null(x) ){
		x <- as.dagitty(x)
		if( type=="cis" ){
			.supportsTypes(x,c("dag","pdag","mag"))
		} else {
			.supportsTypes(x,c("dag"))
		}
	}
	if( is.null(x) && is.null(tests) ){
		stop("Please provide either an input graph or a list of tests")
	}
	if( !is.null(sample.cov) && is.null(sample.nobs) ){
		stop("Please provide sample size (sample.nobs)!")
	}
	if( !is.null(R) && is.null(data) ){
		stop("Bootstrapping requires raw data!")
	}
	if( is.null(R) && type=="cis.loess" ){
		stop(paste("Semi-parametric conditional independence testing",
			  "requires bootstrapping! Please use the argument R"))
	}
	if( type=="cis.chisq" && !is.null(tol) ){
		stop("Tolerance levels not implemented yet for categorical data!")
	}
	if( is.null(data) && is.null(sample.cov) ){
		stop("Please provide either data or sample covariance matrix!")
	}
	if( !is.null(tol) && !is.null(R) ){
		stop("Bootstrap only computes confidence intervals, so tol parameter cannot be used!")
	}
	type.split <- strsplit(type,"\\.")[[1]]
	type.prefix  <- type.split[1]
	type.postfix <- type.split[2]
	if( type.prefix == "tetrads" ){
		# Vanishing tetrad testing
		if( is.null( tests ) ){
			if( type == "tetrads" ){
				tests <- vanishingTetrads( x )
			} else {
				tests <- vanishingTetrads( x, type.postfix )
			}
		}
		if( length(tests) == 0 ){
			return(data.frame())
		}
		if( is.null( sample.cov ) ){
			sample.cov <- cor(data)
		} else {
			sample.cov <- cov2cor(data)
		}
		if( is.null( sample.nobs ) ){
			sample.nobs <- nrow(data)
		}
		w <- (1-conf.level)/2
		tetrad.values <- .tetradsFromCov(sample.cov,tests)
		tetrad.sample.sds <- sapply(seq_len(nrow(tests)),
			function(i) .tetrad.sem(tests[i,],sample.cov,sample.nobs))
		r <- data.frame(
				row.names=apply( tests, 1, 
					function(x) paste(x,collapse=",") ),
				estimate=tetrad.values
		)
		if( !is.null(R) ){
			requireNamespace("boot",quietly=TRUE)
			bo <- boot::boot( data,
				function(data,i) .tetradsFromData(data,tests,i), R )
			r <- cbind( r, t(apply( bo$t, 2,
				function(x) c(sd(x), quantile(x,c((1-conf.level)/2,1-(1-conf.level)/2))) )) )
			colnames(r) <- c("estimate","std.error",
				paste0(100*w,"%"),paste0(100*(1-w),"%"))
		} else {
			std.errors <- tetrad.sample.sds
			if( is.null(tol) ){
				p.values <- 2*pnorm(abs(tetrad.values/tetrad.sample.sds),
					lower.tail=FALSE)
			} else {
				tetrad.z <- tetrad.values / tetrad.sample.sds
				tol.z <- tol / tetrad.sample.sds
				p.values <- pchisq( tetrad.z^2, 1, ncp=tol.z^2, lower.tail=FALSE )
			}
			conf <- cbind( std.errors, p.values, tetrad.values+qnorm(w)*tetrad.sample.sds,
				tetrad.values+qnorm(1-w)*tetrad.sample.sds )
			r <- cbind( r, conf )
			colnames(r) <- c("estimate","std.error","p.value",
				paste0(100*w,"%"),paste0(100*(1-w),"%"))
		}
	} else if( type.prefix == "cis" ){
		# Conditional independence testing
		if( is.null(tests) ){
			tests <- impliedConditionalIndependencies( x )
		}
		if( !is.null(max.conditioning.variables) ){
			tests <- Filter(function(x) length(x$Z)<=max.conditioning.variables, 
					tests)
		}
		if( length(tests) == 0 ){
			return(data.frame())
		}
		row.names <- sapply(tests,function(x){ as.character(x,abbreviate.names=abbreviate.names) })
		if( !is.null(R) ){
			if( type == "cis" ){
				f <- function(i) .ci.test.lm.perm(data,i,conf.level,R)
			} else if( type.postfix=="loess" ){
				f <- function(i) .ci.test.loess.perm(data,i,conf.level,R,loess.pars)
			} else if( type.postfix=="chisq" ){
				f <- function(i) .ci.test.chisq.perm(data,i,conf.level,R)
			} else {
				stop("Illegal arguments!")
			}
			r <- as.data.frame(
				row.names=row.names,
				t(sapply( tests, f ))
			)
		} else {
			if( type == "cis" ){
				f <- function(i) 
					.ci.test.covmat(sample.cov,sample.nobs,i,conf.level,tol)
				if( !is.null(data) ){
					sample.cov <- cov2cor(cov(data))
					sample.nobs <- nrow(data)
				}
			} else if( type.postfix == "chisq" ){
				f <- function(i) 
					.ci.test.chisq(data,i,conf.level)
			} else if( type.postfix == "pillai" ){
				f <- function(i)
					.ci.test.pillai(data,i,conf.level)
			}
			r <- as.data.frame(
				row.names=row.names,
				t(sapply( tests, f ))
			)
		}
	} 
	return(r)
}

#' @rdname localTests
#' @export
ciTest <- function(X,Y,Z=NULL,data,...){
	localTests( data=data[,c(X,Y,Z)], tests=structure(
		list(structure(list(X=X,Y=Y,Z=Z),class="dagitty.ci")),
		class="dagitty.cis"), ...)
}

#' Plot Results of Local Tests
#'
#' Generates a summary plot of the results of local tests
#' (see \link{localTests}). For each test, a test statistic and
#' the confidence interval are shown.
#'
#' @param x data frame; results of the local tests as returned by 
#' \link{localTests}. 
#' @param xlab X axis label.
#' @param xlim numerical vector with 2 elements; range of X axis.
#' @param axis.pars arguments to be passed on to \code{\link{axis}}
#'  when generating the Y axis for the plot.
#' @param sort.by.statistic logical. Sort the rows of \code{x} by
#'  the absolute value of the test statistic before plotting.
#' @param n plot only the n tests for which the absolute value of 
#'  the test statistics diverges most from 0.
#' @param auto.margin logical. Computes the left margin to fit the 
#'  Y axis labels.
#' @param ... further arguments to be passed on to \code{\link{plot}}.
#'
#' @examples
#' d <- simulateSEM("dag{X->{U1 M2}->Y U1->M1}",.6,.6)
#' par(mar=c(2,8,1,1)) # so we can see the test names
#' plotLocalTestResults(localTests( "dag{ X -> {M1 M2} -> Y }", d, "cis" ))
#'
#' @export
plotLocalTestResults <- function(x,xlab="test statistic (95% CI)",
	xlim=range(x[,c(ncol(x)-1,ncol(x))]),sort.by.statistic=TRUE,
	n=Inf,axis.pars=list(las=1), auto.margin=TRUE, ...){
	# Sort by first column, which is expected to contain a test statistic.
	x <- x[order(abs(x[[1]]),decreasing=TRUE),]
	if( is.finite(n) && n > 0 && n < nrow(x) ){
		x <- x[1:n,]
	}
	y <- seq_len(nrow(x))

	if (auto.margin){
		oldpar <- graphics::par(no.readonly=TRUE)
		on.exit(graphics::par(oldpar))
		lmargin <- max(strwidth(rownames(x), "inches")) + 0.2
		if ( lmargin > (par("fin")[1] / 2) ){
			lmargin = par("fin")[1] / 2
			warning("Warning: Plotting area too narrow to fit the Y axis labels.")
		}
		newmar <- oldpar$mai
		newmar[2] <- lmargin
		par(mai=newmar)
	}

	plot( x[,1], y,xlab=xlab,xlim=xlim, yaxt="n", ylab="", ... )
	do.call( axis, c( list( 2, at=y, labels=rownames(x)), axis.pars ) )
	segments( x[,ncol(x)-1], y, x[,ncol(x)], y )
	#segments( seq_len(nrow(x))+.1, x[,1]-2*x[,2], 
	#	y1=x[,1]+2*x[,2], col=2 )
	abline( v=0 )
}

#' Show Paths
#'
#' Returns a list with two compontents: \code{path} gives the actual
#' paths, and \code{open} shows whether each path is open (d-connected)
#' or closed (d-separated).
#'
#' @param x the input graph, a DAG, PDAG, or MAG.
#' @param from name(s) of first variable(s). 
#' @param to name(s) of last variable(s). 
#' @param Z names of variables to condition on for determining open
#' paths.
#' @param limit maximum amount of paths to show. In general, the number of paths grows
#' exponentially with the number of variables in the graph, such that path inspection
#' is not useful except for the most simple models.
#' @param directed logical; should only directed (i.e., causal) paths 
#' be shown?
#'
#' @examples
#' sum( paths(backDoorGraph(getExample("Shrier")))$open ) # Any open Back-Door paths?
#'
#' @export
paths <- function(x,from=exposures(x),to=outcomes(x),Z=list(),limit=100,directed=FALSE){
	x <- as.dagitty(x)
	.supportsTypes(x,c("dag","mag","pdag"))
	.checkAllNames( x, c(from,to,Z) )
	xv <- .getJSVar()
	xv2 <- .getJSVar()
	exposures(x) <- from
	outcomes(x) <- to
	if( length(exposures(x)) == 0 || length(outcomes(x)) == 0 ){
		stop("Both start end end node(s) need to be set!")
	}
	tryCatch({
		.jsassigngraph( xv, x )
		.jsassign( xv2, as.list(Z) )
		.jsassign( xv, .jsp("DagittyR.paths2r(DAGitty.GraphAnalyzer.listPaths(global.",
			xv,",",tolower(directed),",",limit,"),",xv2,",",
			xv,")" ) )
		r <- .jsget(xv)
	},finally={.deleteJSVar(xv);.deleteJSVar(xv2)})
	r
}

#' d-Separation
#'
#' A set Z d-separates a path p if (1) Z contains a non-collider
#' on p, e.g. x->m->y with \code{Z=c("m")}; or (2) some collider on p is not
#' on Z, e.g. x->m<-y with \code{Z=c()}.
#'
#' @param x the input graph, a DAG, PDAG, or MAG.
#' @param X vector of variable names.
#' @param Y vector of variable names.
#' @param Z vector of variable names.
#'
#' \code{dseparated(x,X,Y,Z)} checks if all paths between X and Y are 
#' d-separated by Z.
#'
#' \code{dconnected(x,X,Y,Z)} checks if at least one path between X and Y
#' is not d-separated by Z.
#'
#' @details
#' The functions also work for mixed graphs with directed, undirected,
#' and bidirected edges. The definition of a collider in such graphs
#' is: a node where two arrowheads collide, e.g. x<->m<-y but not
#' x->m--y.
#'
#' @examples
#' dconnected( "dag{x->m->y}", "x", "y", c() ) # TRUE
#' dconnected( "dag{x->m->y}", "x", "y", c("m") ) # FALSE
#' dseparated( "dag{x->m->y}", "x", "y", c() ) # FALSE 
#' dseparated( "dag{x->m->y}", "x", "y", c("m") ) # TRUE
#'
#' @export
dconnected <- function(x,X,Y=list(),Z=list()){
	x <- as.dagitty(x)
	.supportsTypes(x,c("dag","pdag","mag","pag"))
	if( length(Z) == 0 ){
		Z <- list()
	}
	.checkAllNames( x, c(X,Y,Z) )
	xv <- .getJSVar()
	Xv <- .getJSVar()
	Yv <- .getJSVar()
	Zv <- .getJSVar()
	tryCatch({
		.jsassigngraph( xv, x )
		.jsassign( Xv, as.list(X) )
		.jsassign( Yv, as.list(Y) )
		.jsassign( Zv, as.list(Z) )
		.jsassign( xv, .jsp("DagittyR.dconnected(",xv,",",Xv,",",Yv,",",Zv,")") )
		r <- .jsget(xv)
	},finally={
		.deleteJSVar(xv)
		.deleteJSVar(Xv)
		.deleteJSVar(Yv)
		.deleteJSVar(Zv)
	})
	r
}

#' Get Topological Ordering of DAG
#' 
#' Computes a topological ordering of the nodes, i.e., a number for each node
#' such that every node's number is smaller than the one of all its descendants.
#' Bidirected edges (<->) are ignored.
#'
#' @param x the input graph, a DAG
#'
#' @export
topologicalOrdering <- function( x ){
	x <- as.dagitty( x )
	.supportsTypes( x, "dag" )
	xv <- .getJSVar()
	r <- NULL
	tryCatch({
		.jsassigngraph( xv, x )
		.jsassign( xv, .jsp("DAGitty.GraphAnalyzer.topologicalOrdering(global.",xv,")") )
		r <- .jsget( xv )
	}, 
	error=function(e) stop(e),
	finally={.deleteJSVar(xv)})
	r
}


#' @rdname dconnected
#' @export
dseparated <- function(x,X,Y=list(),Z=list()){
	if( length(Y) > 0 ){
		!dconnected(x,X,Y,Z)
	} else {
		setdiff(names(x),dconnected(x,X,Y,Z))
	}
}

#' Generate DAG at Random
#'
#' Generates a random DAG with N variables called x1,...,xN. For each
#' pair of variables xi,xj with i<j, an edge i->j will be present with
#' probability p.
#'
#' @param N desired number of variables.
#' @param p connectivity parameter, a number between 0 and 1.
#' @export
randomDAG <- function( N, p ){
	N <- as.integer(N)
	if( N < 1 ){
		stop("N must be positive!")
	}
	p <- as.double(p)
	if( p < 0 || p > 1 ){
		stop("p is not a probability!")
	}
	xv <- .getJSVar()
	pv <- .getJSVar()
	tryCatch({
	.jsassign( pv, p )
	.jsassign( xv, as.list(paste0("x",seq_len(N))) )
	.jsassign( xv, .jsp("DAGitty.GraphGenerator.randomDAG(",xv,",",pv,").toString()") )
	r <- .jsget(xv)
	},finally={
		.deleteJSVar(xv)
		.deleteJSVar(pv)
	})
	as.dagitty(r)
}

#' Generate Complete DAG
#'
#' Generates a complete DAG on the given variable names. The order
#' in which the variables are given corresponds to the topological ordering
#' of the DAG. Returns a named list.
#'
#' @param x variable names. Can also be a positive integer, in which case 
#' the variables will be called  x1,...,xN.
#' @export
completeDAG <- function( x ){
	if( is.numeric(x) ){
		x <- paste0("x",seq_len(x))
	}
	xv <- .getJSVar()
	tryCatch({
	.jsassign( xv, as.list(x) )
	.jsassign( xv, .jsp("GraphGenerator.randomDAG(",xv,",1).toString()") )
	r <- .jsget(xv)
	},finally={
		.deleteJSVar(xv)
	})
	as.dagitty(r)
}

#' @export
print.dagitty.ivs <- function( x, prefix="", ... ){
	for( i in x ){
		cat( prefix, i$I )
		if( length( i$Z > 0 ) ){
			cat( " | ", paste(i$Z,collapse=", ") )
		}
		cat( "\n" )
	}
}

#' @export
print.dagitty.sets <- function( x, prefix="", ... ){
	for( i in x ){
		if( length(i) == 0 ){
			writeLines("{}")
		} else {
			l <- paste("{ ",paste(i,collapse=", ")," }\n")
			writeLines( strwrap( l, exdent=2 ) )
		}
	}
}

#' @export
as.character.dagitty.ci <- function( x, abbreviate.names=TRUE, ... ){
	nX <- length(x$X)
	nY <- length(x$Y)
	nn <- c(x$X, x$Y, x$Z)
	if( abbreviate.names ){
		nn <- abbreviate(nn)
	}
	r <- paste0( paste(nn[seq_along(x$X)],collapse=", "), " _||_ ", 
		paste(nn[seq_along(x$Y)+nX],collapse=", ") )
	if( length( x$Z > 0 ) ){
		r <- paste0( r, " | ", paste(nn[seq_along(x$Z)+nX+nY],collapse=", ") )
	}
	r
}

#' @export
print.dagitty.ci <- function( x, ... ){
	cat( as.character( x ),"\n" )
}

#' @export
print.dagitty.cis <- function( x, ... ){
	for( i in seq_along(x) ){
		cat( as.character( x[[i]] ) )
		cat("\n")
	}
}

#' @export
as.list.dagitty.cis <- function( x, ... ) structure( x, class="list" )

#' @export
`[.dagitty.cis` <- function(x,y) structure(as.list(x)[y],class="dagitty.cis")


#' @export
as.list.dagitty.sets <- function( x, ... ) structure( x, class="list" )

#' @export
`[.dagitty.sets` <- function(x,y) structure(as.list(x)[y],class="dagitty.sets")

#' @export
as.list.dagitty.ivs <- function( x, ... ) structure( x, class="list" )

#' @export
`[.dagitty.ivs` <- function(x,y) structure(as.list(x)[y],class="dagitty.ivs")