From 53b52f356a6959a1b16263e375ea6544f9cc6039 Mon Sep 17 00:00:00 2001 From: schochastics Date: Wed, 9 Jul 2025 15:11:59 +0200 Subject: [PATCH 1/2] added description of formats for read_graph --- R/foreign.R | 59 +++++++++++++++++++++++++++++++++++++++ man/read_graph.Rd | 70 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) diff --git a/R/foreign.R b/R/foreign.R index 94dd4632f2e..1cf9e52016c 100644 --- a/R/foreign.R +++ b/R/foreign.R @@ -236,6 +236,65 @@ write.graph.fromraw <- function(buffer, file) { #' there is no information in the file about the directedness of the graph. #' } #' } +#' @section GraphML format: +#' GraphML is an XML-based file format for representing various types of graphs. +#' Currently only the most basic import functionality is implemented in igraph: +#' it can read GraphML files without nested graphs and hyperedges. +#' \describe{ +#' \item{index}{Integer, specifies which graph to read from a GraphML file +#' containing multiple graphs. Defaults to 0 for the first graph.} +#' } +#' @section LGL format: +#' The .lgl format is used by the Large Graph Layout visualization software (), it can describe undirected optionally weighted graphs +#' \describe{ +#' \item{names}{Logical, whether to add vertex names as a vertex attribute +#' called "name". Default is TRUE.} +#' \item{weights}{ +#' Whether to add the weights of the edges to the graph +#' as an edge attribute called “weight”. +#' `"yes"` adds the weights (even if they are not present in the file, +#' in this case they are assumed to be zero). +#' `"no"` does not add any edge attribute. +#' `"auto"` (the default) adds the attribute if and only +#' if there is at least one explicit edge weight in the input file. +#' } +#' \item{directed}{Logical, whether to create a directed graph. Default is FALSE.} +#' } +#' +#' @section DIMACS format: +#' This is a line-oriented text file (ASCII) format. +#' The first character of each line defines the type of the line. +#' If the first character is c the line is a comment line and it is ignored. +#' There is one problem line (p in the file), +#' it must appear before any node and arc descriptor lines. +#' The problem line has three fields separated by spaces: the problem type (max or edge), +#' the number of vertices, and number of edges in the graph. In MAX problems, +#' exactly two node identification lines are expected (n), one for the source, and one for the target vertex. +#' These have two fields: the ID of the vertex and the type of the vertex, either s ( = source) or t ( = target). +#' Arc lines start with a and have three fields: the source vertex, the target vertex and the edge capacity. +#' In EDGE problems, there may be a node line (n) for each node. It specifies the node index and an +#' integer node label. Nodes for which no explicit label was specified will use their index as label. +#' In EDGE problems, each edge is specified as an edge line (e). +#' \describe{ +#' \item{directed}{Logical, whether to create a directed graph. Default is TRUE.} +#' } +#' +#' @section DL format: +#' This is a simple textual file format used by UCINET. See for examples. All the forms described here are supported by igraph. +#' Vertex names and edge weights are also supported and they are added as attributes. +#' (If an attribute handler is attached.) +#' Note the specification does not mention whether the format is case sensitive or not. +#' For igraph DL files are case sensitive, i.e. Larry and larry are not the same. +#' \describe{ +#' \item{directed}{Logical, whether to create a directed graph. Default is TRUE.} +#' } +#' +#' @section GraphDB format: +#' This is a binary format, used in the ARG Graph Database for isomorphism testing. For more information, see +#' \describe{ +#' \item{directed}{Logical, whether to create a directed graph. Default is TRUE.} +#' } +#' #' @author Gabor Csardi \email{csardi.gabor@@gmail.com} #' @seealso [write_graph()] #' @keywords graphs diff --git a/man/read_graph.Rd b/man/read_graph.Rd index 9170a6e5f08..3b8168b11b2 100644 --- a/man/read_graph.Rd +++ b/man/read_graph.Rd @@ -106,6 +106,76 @@ there is no information in the file about the directedness of the graph. } } +\section{GraphML format}{ + +GraphML is an XML-based file format for representing various types of graphs. +Currently only the most basic import functionality is implemented in igraph: +it can read GraphML files without nested graphs and hyperedges. +\describe{ +\item{index}{Integer, specifies which graph to read from a GraphML file +containing multiple graphs. Defaults to 0 for the first graph.} +} +} + +\section{LGL format}{ + +The .lgl format is used by the Large Graph Layout visualization software (\url{https://lgl.sourceforge.net}), it can describe undirected optionally weighted graphs +\describe{ +\item{names}{Logical, whether to add vertex names as a vertex attribute +called "name". Default is TRUE.} +\item{weights}{ +Whether to add the weights of the edges to the graph +as an edge attribute called “weight”. +\code{"yes"} adds the weights (even if they are not present in the file, +in this case they are assumed to be zero). +\code{"no"} does not add any edge attribute. +\code{"auto"} (the default) adds the attribute if and only +if there is at least one explicit edge weight in the input file. +} +\item{directed}{Logical, whether to create a directed graph. Default is FALSE.} +} +} + +\section{DIMACS format}{ + +This is a line-oriented text file (ASCII) format. +The first character of each line defines the type of the line. +If the first character is c the line is a comment line and it is ignored. +There is one problem line (p in the file), +it must appear before any node and arc descriptor lines. +The problem line has three fields separated by spaces: the problem type (max or edge), +the number of vertices, and number of edges in the graph. In MAX problems, +exactly two node identification lines are expected (n), one for the source, and one for the target vertex. +These have two fields: the ID of the vertex and the type of the vertex, either s ( = source) or t ( = target). +Arc lines start with a and have three fields: the source vertex, the target vertex and the edge capacity. +In EDGE problems, there may be a node line (n) for each node. It specifies the node index and an +integer node label. Nodes for which no explicit label was specified will use their index as label. +In EDGE problems, each edge is specified as an edge line (e). +\describe{ +\item{directed}{Logical, whether to create a directed graph. Default is TRUE.} +} +} + +\section{DL format}{ + +This is a simple textual file format used by UCINET. See \url{http://www.analytictech.com/networks/dataentry.htm} for examples. All the forms described here are supported by igraph. +Vertex names and edge weights are also supported and they are added as attributes. +(If an attribute handler is attached.) +Note the specification does not mention whether the format is case sensitive or not. +For igraph DL files are case sensitive, i.e. Larry and larry are not the same. +\describe{ +\item{directed}{Logical, whether to create a directed graph. Default is TRUE.} +} +} + +\section{GraphDB format}{ + +This is a binary format, used in the ARG Graph Database for isomorphism testing. For more information, see \url{https://mivia.unisa.it/datasets/graph-database/arg-database/} +\describe{ +\item{directed}{Logical, whether to create a directed graph. Default is TRUE.} +} +} + \seealso{ \code{\link[=write_graph]{write_graph()}} From 86b63a161018312452bf569981a1a688c2a22754 Mon Sep 17 00:00:00 2001 From: schochastics Date: Wed, 9 Jul 2025 15:23:43 +0200 Subject: [PATCH 2/2] added all write formats --- R/foreign.R | 66 +++++++++++++++++++++++++++++++++++++++- man/read_graph.Rd | 5 +++ man/write_graph.Rd | 76 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 1 deletion(-) diff --git a/R/foreign.R b/R/foreign.R index 1cf9e52016c..a127941d2d0 100644 --- a/R/foreign.R +++ b/R/foreign.R @@ -288,7 +288,8 @@ write.graph.fromraw <- function(buffer, file) { #' \describe{ #' \item{directed}{Logical, whether to create a directed graph. Default is TRUE.} #' } -#' +#' @section GML format: +#' GML is a quite general textual format. For the specifics of the implementation, see the linked documentation of the cClibrary. #' @section GraphDB format: #' This is a binary format, used in the ARG Graph Database for isomorphism testing. For more information, see #' \describe{ @@ -381,6 +382,69 @@ read_graph <- function( #' @section Pajek format: The `pajek` format is provided for interoperability #' with the Pajek software only. Since the format does not have a formal #' specification, it is not recommended for general data exchange or archival. +#' @section LGL format: +#' The .lgl format is used by the Large Graph Layout visualization software (), it can describe undirected optionally weighted graphs. +#' \describe{ +#' \item{names}{The name of a vertex attribute to use for vertex names, or +#' NULL to use numeric IDs.} +#' \item{weights}{The name of an edge attribute to use for edge weights, or +#' NULL to omit weights.} +#' \item{isolates}{Logical, whether to include isolated vertices in the file. +#' Default is FALSE.} +#' } +#' +#' @section DIMACS format: +#' This is a line-oriented text file (ASCII) format. +#' The first character of each line defines the type of the line. +#' If the first character is c the line is a comment line and it is ignored. +#' There is one problem line (p in the file), +#' it must appear before any node and arc descriptor lines. +#' The problem line has three fields separated by spaces: the problem type (max or edge), +#' the number of vertices, and number of edges in the graph. In MAX problems, +#' exactly two node identification lines are expected (n), one for the source, and one for the target vertex. +#' These have two fields: the ID of the vertex and the type of the vertex, either s ( = source) or t ( = target). +#' Arc lines start with a and have three fields: the source vertex, the target vertex and the edge capacity. +#' In EDGE problems, there may be a node line (n) for each node. It specifies the node index and an +#' integer node label. Nodes for which no explicit label was specified will use their index as label. +#' In EDGE problems, each edge is specified as an edge line (e). +#' \describe{ +#' \item{source}{Numeric ID of the source vertex.} +#' \item{target}{Numeric ID of the target vertex.} +#' \item{capacity}{The name of an edge attribute to use for edge capacities, +#' or NULL to use the "capacity" attribute if it exists.} +#' } +#' +#' @section GML format: +#' GML is a quite general textual format. +#' \describe{ +#' \item{id}{Optional numeric vertex IDs to use.} +#' \item{creator}{Optional string specifying the creator of the file.} +#' } +#' +#' @section GraphML format: +#' GraphML is an XML-based file format for representing various types of graphs. +#' When a numerical attribute value is NaN, it will be omitted from the file. +#' This function assumes that non-ASCII characters in attribute names and string +#' attribute values are UTF-8 encoded. If this is not the case, the resulting XML file will be invalid. Control characters, i.e. character codes up to and including 31 (with the exception of tab, cr and lf), are not allowed. +#' \describe{ +#' \item{prefixAttr}{Logical, whether to prefix attribute names to ensure +#' uniqueness across vertex/edge/graph attributes. Default is TRUE.} +#' } +#' +#' @section LEDA format: +#' This function writes a graph to an output stream in LEDA format. See +#' The support for the LEDA format is very basic at the moment; igraph writes only the LEDA graph section which supports one selected vertex and edge +#' attribute and no layout information or visual attributes. +#' \describe{ +#' \item{vertex.attr}{Name of vertex attribute to include in the file.} +#' \item{edge.attr}{Name of edge attribute to include in the file.} +#' } +#' @section DOT format: +#' DOT is the format used by the widely known GraphViz software, see http://www.graphviz.org for details. The grammar of the DOT format can be found here: http://www.graphviz.org/doc/info/lang.html +#' This is only a preliminary implementation, no visualization information is written. +#' This format is meant solely for interoperability with Graphviz. It is not recommended for +#' data exchange or archival. +#' #' @author Gabor Csardi \email{csardi.gabor@@gmail.com} #' @seealso [read_graph()] #' @references Adai AT, Date SV, Wieland S, Marcotte EM. LGL: creating a map of diff --git a/man/read_graph.Rd b/man/read_graph.Rd index 3b8168b11b2..05d08030b22 100644 --- a/man/read_graph.Rd +++ b/man/read_graph.Rd @@ -168,6 +168,11 @@ For igraph DL files are case sensitive, i.e. Larry and larry are not the same. } } +\section{GML format}{ + +GML is a quite general textual format. For the specifics of the implementation, see the linked documentation of the cClibrary. +} + \section{GraphDB format}{ This is a binary format, used in the ARG Graph Database for isomorphism testing. For more information, see \url{https://mivia.unisa.it/datasets/graph-database/arg-database/} diff --git a/man/write_graph.Rd b/man/write_graph.Rd index 363ce6328f9..f1f4a99ca1d 100644 --- a/man/write_graph.Rd +++ b/man/write_graph.Rd @@ -62,6 +62,82 @@ with the Pajek software only. Since the format does not have a formal specification, it is not recommended for general data exchange or archival. } +\section{LGL format}{ + +The .lgl format is used by the Large Graph Layout visualization software (\url{https://lgl.sourceforge.net}), it can describe undirected optionally weighted graphs. +\describe{ +\item{names}{The name of a vertex attribute to use for vertex names, or +NULL to use numeric IDs.} +\item{weights}{The name of an edge attribute to use for edge weights, or +NULL to omit weights.} +\item{isolates}{Logical, whether to include isolated vertices in the file. +Default is FALSE.} +} +} + +\section{DIMACS format}{ + +This is a line-oriented text file (ASCII) format. +The first character of each line defines the type of the line. +If the first character is c the line is a comment line and it is ignored. +There is one problem line (p in the file), +it must appear before any node and arc descriptor lines. +The problem line has three fields separated by spaces: the problem type (max or edge), +the number of vertices, and number of edges in the graph. In MAX problems, +exactly two node identification lines are expected (n), one for the source, and one for the target vertex. +These have two fields: the ID of the vertex and the type of the vertex, either s ( = source) or t ( = target). +Arc lines start with a and have three fields: the source vertex, the target vertex and the edge capacity. +In EDGE problems, there may be a node line (n) for each node. It specifies the node index and an +integer node label. Nodes for which no explicit label was specified will use their index as label. +In EDGE problems, each edge is specified as an edge line (e). +\describe{ +\item{source}{Numeric ID of the source vertex.} +\item{target}{Numeric ID of the target vertex.} +\item{capacity}{The name of an edge attribute to use for edge capacities, +or NULL to use the "capacity" attribute if it exists.} +} +} + +\section{GML format}{ + +GML is a quite general textual format. +\describe{ +\item{id}{Optional numeric vertex IDs to use.} +\item{creator}{Optional string specifying the creator of the file.} +} +} + +\section{GraphML format}{ + +GraphML is an XML-based file format for representing various types of graphs. +When a numerical attribute value is NaN, it will be omitted from the file. +This function assumes that non-ASCII characters in attribute names and string +attribute values are UTF-8 encoded. If this is not the case, the resulting XML file will be invalid. Control characters, i.e. character codes up to and including 31 (with the exception of tab, cr and lf), are not allowed. +\describe{ +\item{prefixAttr}{Logical, whether to prefix attribute names to ensure +uniqueness across vertex/edge/graph attributes. Default is TRUE.} +} +} + +\section{LEDA format}{ + +This function writes a graph to an output stream in LEDA format. See \url{http://www.algorithmic-solutions.info/leda_guide/graphs/leda_native_graph_fileformat.html} +The support for the LEDA format is very basic at the moment; igraph writes only the LEDA graph section which supports one selected vertex and edge +attribute and no layout information or visual attributes. +\describe{ +\item{vertex.attr}{Name of vertex attribute to include in the file.} +\item{edge.attr}{Name of edge attribute to include in the file.} +} +} + +\section{DOT format}{ + +DOT is the format used by the widely known GraphViz software, see http://www.graphviz.org for details. The grammar of the DOT format can be found here: http://www.graphviz.org/doc/info/lang.html +This is only a preliminary implementation, no visualization information is written. +This format is meant solely for interoperability with Graphviz. It is not recommended for +data exchange or archival. +} + \examples{ g <- make_ring(10)