NOTES ON STATISTICS, PROBABILITY and MATHEMATICS


Graphs:


For parameter changes and options of the igraph() package, this is a reference.

To generate a random graph with the package igraph() the call is as follows:


GENERATING RANDOM UNDIRECTED GRAPHS:

library(igraph)
set.seed(5)
g = sample_gnp(30, 0.1, directed=F)
plot(g, 
     vertex.size=20, 
     vertex.color ='black',
     vertex.label.cex=1.2,
     vertex.label.color='white',
     edge.width=2,
     edge.color='red')

There number of nodes or vertices is the first entry (\(30\)), and the probability of having an edge or connection between any two vertices is \(0.1.\) The graph is un-directed.

The plotting is rather random, but it can be controlled as follows:

plot(g, 
     vertex.size=20, 
     vertex.color ='blue',
     vertex.label.cex=1.2,
     vertex.label.color='white',
     edge.width=2,
     edge.color=rgb(.2,.9,.3,.8),
     layout=layout_in_circle(g))

It is possible to take a random walk through the graph as follows:

# Graph name, starting point, number of steps, what to do if stuck:
random_walk(g, 9, 10, stuck='return')
## + 10/30 vertices, from ee2d3f7:
##  [1]  9 29 22  4 15 20 25 17 16 17

The graph can be saved with the command write.graph(g, file='Graph.txt', format="edgelist").


Analyze a Network from Existing Data:

From this presentation the scenes in which two given characters in Les Mis appear together is in a file called EL, whereas the number of edges coming off each character is in NL:

EL = read.csv('https://raw.githubusercontent.com/RInterested/DATASETS/gh-pages/lesmis-el.csv')
head(EL)
##     Character1  Character2 Weight
## 1 Gillenormand JeanValjean      2
## 2      Zephine   Listolier      3
## 3         Joly     Feuilly      5
## 4       Brevet       Judge      2
## 5   Bamatabois JeanValjean      2
## 6     Gavroche JeanValjean      1
NL = read.csv('https://raw.githubusercontent.com/RInterested/DATASETS/gh-pages/lesmis-nl.csv')
head(NL)
##     Character1 Size
## 1 Gillenormand   29
## 2      Zephine   24
## 3         Joly   43
## 4       Brevet   11
## 5   Bamatabois   11
## 6     Gavroche   56
gr = graph_from_data_frame(d = EL, vertices = NL, directed=F)

plot(gr, 
     vertex.label.cex = ifelse(vertex.attributes(gr)$name%in%c('JeanValjean','Cosette','Marius','Javert', 'Fantine', 'Gavroche'), 4, 1),
          vertex.label.color = ifelse(vertex.attributes(gr)$name%in%c('JeanValjean','Cosette','Marius','Javert', 'Fantine', 'Gavroche'), 'red', NA),
          vertex.color = ifelse(vertex.attributes(gr)$name%in%c('JeanValjean','Cosette','Marius','Javert', 'Fantine', 'Gavroche'), 'navy', 'yellow'),
     vertex.frame.color='yellow',
     edge.color="firebrick", 
     vertex.label.color="black",
     layout = layout_with_kk(gr))

Taking a look at the network:

gr
## IGRAPH efc6bf0 UN-- 77 508 -- 
## + attr: name (v/c), Size (v/n), Weight (e/n)
## + edges from efc6bf0 (vertex names):
##  [1] Gillenormand   --JeanValjean                          
##  [2] Zephine        --Listolier                            
##  [3] Joly           --Feuilly                              
##  [4] Brevet         --Judge                                
##  [5] Bamatabois     --JeanValjean                          
##  [6] Gavroche       --JeanValjean                          
##  [7] MadameHucheloup--Courfeyrac                           
##  [8] Gavroche       --Javert                               
## + ... omitted several edges

We see it is an UN undirected (N) graph with names (N). There are \(77\) nodes, \(508\) edges, and the types of links.

Also we can analyze the network to see who is more of an influencer by looking at how many edges arrive at each vertex. If it were a directed graph mode='all' would be different from arriving edges, i.e. mode='in'.

sort(degree(gr, mode='all'), decreasing=T)[1:10]
##           JeanValjean              Gavroche                Marius 
##                    72                    44                    38 
##                Javert            Thenardier               Fantine 
##                    34                    32                    30 
##              Enjolras            Courfeyrac LAigleDeMeauxBossuet  
##                    30                    26                    26 
##                  Joly 
##                    24

We can look at edge.attributes(gr) or vertex.attributes(gr) or visualize the data as a matrix gr[].


Graph Cliques and Ramsey Numbers:

Here is a naive attempt at getting extremely lucky and being able to tighten the bounds for the Ramsey number \(R(5,5)\), which is currently bounded between \([43, 49]\) first by randomly generating red edges using a biased coin flip:

set.seed(0)
colors = c("darkred","blue4")
vcol = 'gold'
vlab = 'white'

# 5 5   [43, 49]    Exoo 1989b, McKay and Radziszowski 1995
n = 43
# R(5,5)
cl = 5
m = matrix(rbinom(n^2,1,.7),nrow=n)
m[lower.tri(m)] = 0  # It is an undirected graft
diag(m) = 0 # Avoiding self edges
g = graph_from_adjacency_matrix(m, mode='undirected')


plot(g, layout = layout_in_circle(g),
     edge.curved=0, 
     edge.color=colors[1],
     vertex.size=10, 
     vertex.color=vcol,
     vertex.label.color='red',
     vertex.label.cex=2)

The blue edges are similarly created as:

m.bar = matrix(rep(0, nrow(m)^2), nrow=nrow(m))
for(i in 1:nrow(m)){for(j in 1:ncol(m)){m.bar[i,j] <- ifelse(m[i,j]==0,1,0)}}
m.bar[lower.tri(m.bar)] = 0  
diag(m.bar) = 0
g.bar = graph_from_adjacency_matrix(m.bar, mode='undirected')
 
plot(g.bar, 
     layout=layout_in_circle(g.bar), 
     edge.curved=0, 
     edge.color=colors[2],
     vertex.color=vcol,
     vertex.size=10,
     vertex.label.cex=2,
     vertex.label.color="red")

These two colorations can now be merged into one complete graph:

plot(g, layout = layout_in_circle(g),
     edge.curved=0, 
     edge.color=colors[1],
     vertex.size=10, 
     vertex.color=vcol,
     vertex.label.color='red',
     vertex.label.cex=2)
plot(g.bar, 
     layout=layout_in_circle(g.bar), 
     edge.curved=0, 
     edge.width=2,
     edge.color=colors[2],
     vertex.color=vcol,
     vertex.size=10,
     vertex.label.cex=2,
     vertex.label.color="red",
     add=T)

We can check it is complete remembering that the edges of a complete graph with \(n\) vertices are \(n (n-1)/2:\)

u = union(g,g.bar)

print(paste("Is the graph complete? ", length(E(u))==n*(n-1)/2))
## [1] "Is the graph complete?  TRUE"
# Also checkable by finding a ratio # edges / possible # edges of 1:

edge_density(u, loops=F)
## [1] 1

How many red and blue cliques of size \(5\) are there:

red.clust = cliques(g, min=cl, max=cl)
print(paste("How many red clicks of size", cl, "are there?", length(red.clust),"."))
## [1] "How many red clicks of size 5 are there? 22360 ."
blue.clust = cliques(g.bar, min=cl, max=cl)
print(paste("How many blue clicks of size", cl, "are there?", length(blue.clust),'.'))
## [1] "How many blue clicks of size 5 are there? 1 ."

And we can look for the largest cliques:

print(paste("The maximum red clique size is", clique_num(g)))
## [1] "The maximum red clique size is 11"
print(paste("The maximum blue clique size is", clique_num(g.bar)))
## [1] "The maximum blue clique size is 5"

Arcgraph

A cool alternative is the arcgraph:

library(igraph)
library(devtools)
library(arcdiagram)

mis_file = "https://raw.githubusercontent.com/RInterested/DATASETS/gh-pages/lesmiserables.gml"
mis_graph = read.graph(mis_file, format="gml")

edgelist = get.edgelist(mis_graph)
vlabels = get.vertex.attribute(mis_graph, "label")
vgroups = get.vertex.attribute(mis_graph, "group")
vfill = get.vertex.attribute(mis_graph, "fill")
vborders = get.vertex.attribute(mis_graph, "border")
degrees = degree(mis_graph)
values = get.edge.attribute(mis_graph, "value")

library(reshape)
library(dplyr)

x = data.frame(vgroups, degrees, vlabels, ind=1:vcount(mis_graph))
y = arrange(x, desc(vgroups), desc(degrees))

new_ord = y$ind

arcplot(edgelist, ordering=new_ord, labels=vlabels, cex.labels=0.8,
        show.nodes=TRUE, col.nodes=vborders, bg.nodes=vfill,
        cex.nodes = log(degrees)+0.5, pch.nodes=21,
        lwd.nodes = 2, line=-0.5,
        col.arcs = hsv(0, 0, 0.2, 0.25), lwd.arcs = 1.5 * values)


Home Page

NOTE: These are tentative notes on different topics for personal use - expect mistakes and misunderstandings.