1. Basic Manipulation & Metrics

1.1. Network objects

We will use the SNA data, which were collected form a US discussion forum. The rows indicate replies between users. All users have self-reported political ideology. The data were stored in edgelist format.

load("D:/Dropbox/Teaching Materials/CUHK Courses/Digital Research/Slides/2018/Labs/Network analysis/SNA.Rdata")
nodes = SNA[,c("sender","leanR")]
nodes = nodes[!duplicated(nodes$sender),] #1178 users
table(nodes$leanR)
## 
## central    left   right 
##     559     261     358
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
links = SNA %>% dplyr::select("sender","receiver") %>%
  dplyr::filter(sender!=receiver) %>%
  dplyr::group_by(sender,receiver) %>% dplyr::summarise(weight=length(sender))
head(links)
## # A tibble: 6 x 3
## # Groups:   sender [1]
##   sender     receiver     weight
##   <chr>      <chr>         <int>
## 1 ????? ???? 99percenter       2
## 2 ????? ???? AdamT            14
## 3 ????? ???? AliHajiSheik      1
## 4 ????? ???? Amigo             1
## 5 ????? ???? apdst             3
## 6 ????? ???? areafiftyone      1

We introduce igraph to import edgelist

library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
net = graph.data.frame(links, nodes, directed=T)
net
## IGRAPH c264ab8 DNW- 1178 43815 -- 
## + attr: name (v/c), leanR (v/c), weight (e/n)
## + edges from c264ab8 (vertex names):
##  [1] ????? ????->99percenter     ????? ????->AdamT          
##  [3] ????? ????->AliHajiSheik    ????? ????->Amigo          
##  [5] ????? ????->apdst           ????? ????->areafiftyone   
##  [7] ????? ????->Aunt Spiker     ????? ????->Awesome!       
##  [9] ????? ????->beerftw         ????? ????->Bigfoot 88     
## [11] ????? ????->Billy the Kid   ????? ????->Binky          
## [13] ????? ????->Blue_State      ????? ????->Bobcat         
## [15] ????? ????->Boo Radley      ????? ????->Born Free      
## + ... omitted several edges
E(net) # The edges of the "net" object
## + 43815/43815 edges from c264ab8 (vertex names):
##  [1] ????? ????->99percenter     ????? ????->AdamT          
##  [3] ????? ????->AliHajiSheik    ????? ????->Amigo          
##  [5] ????? ????->apdst           ????? ????->areafiftyone   
##  [7] ????? ????->Aunt Spiker     ????? ????->Awesome!       
##  [9] ????? ????->beerftw         ????? ????->Bigfoot 88     
## [11] ????? ????->Billy the Kid   ????? ????->Binky          
## [13] ????? ????->Blue_State      ????? ????->Bobcat         
## [15] ????? ????->Boo Radley      ????? ????->Born Free      
## [17] ????? ????->Bronson         ????? ????->Caine          
## [19] ????? ????->Camlon          ????? ????->Captain America
## + ... omitted several edges
V(net) # The vertices of the "net" object
## + 1178/1178 vertices, named, from c264ab8:
##    [1] Gipper               Redress              soccerboy22         
##    [4] jasonxe              washunut             Catawba             
##    [7] TOJ                  Cecil James          _Markum_            
##   [10] Councilman           cpwill               cAPSLOCK            
##   [13] Ikari                Gina                 pbrauer             
##   [16] nota bene            Boo Radley           randel              
##   [19] Hugh_Akston          upsideguy            dixiesolutions      
##   [22] American             Top Cat              lpast               
##   [25] Risky Thicket        Crosscheck           Temporal            
##   [28] ChuckBerry           Fiddytree            Voltaire X          
## + ... omitted several vertices
hist(E(net)$weight) # Edge weight

table(V(net)$leanR) # Vertex attribute ideology
## 
## central    left   right 
##     559     261     358

We can also import matrix

mx = as.matrix(get.adjacency(net))
net2 = graph.adjacency(mx)
net2
## IGRAPH c2deb81 DN-- 1178 43815 -- 
## + attr: name (v/c)
## + edges from c2deb81 (vertex names):
##  [1] Gipper ->Redress      Gipper ->cpwill       Gipper ->Ikari       
##  [4] Gipper ->disneydude   Gipper ->poweRob      Gipper ->Josie       
##  [7] Gipper ->Kandahar     Gipper ->votewho2012  Gipper ->ThePlayDrive
## [10] Gipper ->sharon       Gipper ->Grim17       Gipper ->Travis007   
## [13] Gipper ->Carleen      Redress->Gipper       Redress->soccerboy22 
## [16] Redress->jasonxe      Redress->washunut     Redress->Catawba     
## [19] Redress->Councilman   Redress->cpwill       Redress->cAPSLOCK    
## [22] Redress->Ikari        Redress->pbrauer      Redress->nota bene   
## + ... omitted several edges

1.2. Individual level indicators

dc = degree(net)
head(sort(dc,decreasing = T))
##        AdamT      Redress   TurtleDude      poweRob       cpwill Conservative 
##          994          756          739          701          659          646
dgr = data.frame(ideology=V(net)$leanR,out_degree=degree(net,mode="out"),in_degree=degree(net,mode="in"))
head(dgr)
##             ideology out_degree in_degree
## Gipper         right         13        11
## Redress         left        331       425
## soccerboy22    right        114        90
## jasonxe        right        139       160
## washunut        left        127       135
## Catawba         left        229       189

OK, let’s do some basic calculations:

library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
cor.test(dgr$out_degree,dgr$in_degree,method='spearman')
## Warning in cor.test.default(dgr$out_degree, dgr$in_degree, method = "spearman"):
## Cannot compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  dgr$out_degree and dgr$in_degree
## S = 26341425, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.9033159
plyr::ddply(dgr,.(ideology),summarise,mean_d = mean(out_degree),median_d = median(out_degree),median_ind=median(in_degree))
##   ideology   mean_d median_d median_ind
## 1  central 25.37209      5.0          5
## 2     left 48.31418     14.0         16
## 3    right 47.54749     15.5         15
summary(closeness(net))
## Warning in closeness(net): At centrality.c:2617 :closeness centrality is not
## well-defined for disconnected graphs
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 7.212e-07 4.406e-06 4.414e-06 4.274e-06 4.419e-06 4.432e-06
summary(betweenness(net))
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     0.00     0.47    59.30  1667.29  1315.57 43034.95

1.3. Network level indicators

vcount(net) # 1178 nodes
## [1] 1178
ecount(net) # 43815 unique edges
## [1] 43815
plot(degree_distribution(net)) # degree distribution

mean(degree(net)) # mean degree
## [1] 74.38879
median(degree(net))
## [1] 19
graph.density(net) #0.03160102
## [1] 0.03160102
reciprocity(net) #0.6180075
## [1] 0.6180075
transitivity(net)
## [1] 0.3611749
triad.census(net)
##  [1] 240709063  15447217  11664348    232894    317637    381020    823100
##  [8]    778741     48772      6575    777241     49994     60153     71180
## [15]    241857    145384

1.4. Community structure

cpn=clusters(net)
#cpn
cmt=fastgreedy.community(as.undirected(net))
modularity(cmt)
## [1] 0.1758044
#cmt$membership
table(cmt$membership,V(net)$leanR)
##     
##      central left right
##   1      235  111   157
##   2      206   87   132
##   3      108   60    67
##   4        1    1     0
##   5        1    0     0
##   6        0    1     0
##   7        0    0     1
##   8        0    0     1
##   9        1    0     0
##   10       1    0     0
##   11       1    0     0
##   12       0    1     0
##   13       1    0     0
##   14       1    0     0
##   15       1    0     0
##   16       1    0     0
##   17       1    0     0

1.5. Coreness/Centralization

cores = coreness(net,mode="all")
head(sort(cores,decreasing = T))
##  Redress washunut  Catawba   cpwill    Ikari  pbrauer 
##      122      122      122      122      122      122
centr_degree(net)$centralization
## [1] 0.3909909
centr_betw(net)$centralization
## [1] 0.0708887

Any relationship between coreness and centralities?

tempd = data.frame(cores,degree(net),closeness(net),betweenness(net))
## Warning in closeness(net): At centrality.c:2617 :closeness centrality is not
## well-defined for disconnected graphs
cor(tempd)
##                      cores degree.net. closeness.net. betweenness.net.
## cores            1.0000000   0.8324234     0.15837240       0.66506457
## degree.net.      0.8324234   1.0000000     0.11394261       0.93819420
## closeness.net.   0.1583724   0.1139426     1.00000000       0.08676795
## betweenness.net. 0.6650646   0.9381942     0.08676795       1.00000000

1.6. Data viz

Please refer to http://kateto.net/network-visualization

2. Network Formation Models

R igraph is good for descriptive statistics, whereas we need to switch to SNA for more advanced statistical models.

library(sna,quietly = T)
## 
## Attaching package: 'statnet.common'
## The following object is masked from 'package:base':
## 
##     order
## network: Classes for Relational Data
## Version 1.16.0 created on 2019-11-30.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##                     Mark S. Handcock, University of California -- Los Angeles
##                     David R. Hunter, Penn State University
##                     Martina Morris, University of Washington
##                     Skye Bender-deMoll, University of Washington
##  For citation information, type citation("network").
##  Type help("network-package") to get started.
## 
## Attaching package: 'network'
## The following object is masked from 'package:plyr':
## 
##     is.discrete
## The following objects are masked from 'package:igraph':
## 
##     %c%, %s%, add.edges, add.vertices, delete.edges, delete.vertices,
##     get.edge.attribute, get.edges, get.vertex.attribute, is.bipartite,
##     is.directed, list.edge.attributes, list.vertex.attributes,
##     set.edge.attribute, set.vertex.attribute
## sna: Tools for Social Network Analysis
## Version 2.5 created on 2019-12-09.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##  For citation information, type citation("sna").
##  Type help(package="sna") to get started.
## 
## Attaching package: 'sna'
## The following objects are masked from 'package:igraph':
## 
##     betweenness, bonpow, closeness, components, degree, dyad.census,
##     evcent, hierarchy, is.connected, neighborhood, triad.census

2.1. QAP

The input for QAP should be in matrix format:

dv = as.matrix(get.adjacency(net))

Create a match(similarity) matrix based on the node attributes (ideology):

head(nodes)
##         sender leanR
## 1       Gipper right
## 2      Redress  left
## 3  soccerboy22 right
## 8      jasonxe right
## 9     washunut  left
## 12     Catawba  left
mm = outer(nodes$leanR, nodes$leanR, function(a, b) as.integer(a == b))
diag(mm) = 0
dimnames(mm) = list(nodes$sender,nodes$sender)
mm[1:5,1:5]
##             Gipper Redress soccerboy22 jasonxe washunut
## Gipper           0       0           1       1        0
## Redress          0       0           0       0        1
## soccerboy22      1       0           0       1        0
## jasonxe          1       0           1       0        0
## washunut         0       1           0       0        0

Now, we can do the qaptest:

gcor(dv,mm)
## [1] -0.02610566

Test the correlation using qaptest: qaptest(A.List.Of.Networks, Some.Network.Function, reps=100), where reps is the number of graphs to be generated.

Out of a 100 permutations, how many had >= or <= correlation than the observed value? (the parameters g1=1 g2=2 tell the test to compare the first & second element of the list of networks that is the first argument of qaptest)

reply.qap <- qaptest(list(dv,mm), gcor, g1=1, g2=2, reps=100)
reply.qap
## 
## QAP Test Results
## 
## Estimated p-values:
##  p(f(perm) >= f(d)): 1 
##  p(f(perm) <= f(d)): 0
plot(reply.qap)

If we have multiple IVs (very slow):

IVs = rgraph(nrow(dv), 3) 
IVs[1,,] = mm

net.mod = netlm(dv, IVs, reps=100)
summary(net.mod)
## 
## OLS Network Model
## 
## Residuals:
##          0%         25%         50%         75%        100% 
## -0.03520397 -0.03513804 -0.03493889 -0.02565836  0.97454079 
## 
## Coefficients:
##             Estimate      Pr(<=b) Pr(>=b) Pr(>=|b|)
## (intercept)  0.0350048147 1.00    0.00    0.00     
## x1          -0.0094796840 0.00    1.00    0.00     
## x2           0.0001991514 0.71    0.29    0.45     
## x3          -0.0000659224 0.39    0.61    0.80     
## 
## Residual standard error: 0.1749 on 1386502 degrees of freedom
## Multiple R-squared: 0.0006819    Adjusted R-squared: 0.0006797 
## F-statistic: 315.4 on 3 and 1386502 degrees of freedom, p-value:     0 
## 
## 
## Test Diagnostics:
## 
##  Null Hypothesis: qap 
##  Replications: 100 
##  Coefficient Distribution Summary:
## 
##        (intercept)        x1        x2        x3
## Min       34.51931 -10.42300  -2.43416  -2.13568
## 1stQ      36.91664  -2.94158  -0.50241  -0.67767
## Median    38.28032  -0.41626   0.20988  -0.07456
## Mean      38.47818  -0.12706   0.12135   0.02330
## 3rdQ      39.88739   2.08303   0.74799   0.75868
## Max       42.75128  16.03287   3.66323   2.24113

2.2(a). ERGM

Outline; 1) ERGM creation 2) MCMC diagnostics 3) ERGM model simulation 4) Model comparison 5) ERGM performance improvements

# load the "ergm" library
library(ergm)
## Warning: package 'ergm' was built under R version 3.6.3
## 
## ergm: version 3.10.4, created on 2019-06-10
## Copyright (c) 2019, Mark S. Handcock, University of California -- Los Angeles
##                     David R. Hunter, Penn State University
##                     Carter T. Butts, University of California -- Irvine
##                     Steven M. Goodreau, University of Washington
##                     Pavel N. Krivitsky, University of Wollongong
##                     Martina Morris, University of Washington
##                     with contributions from
##                     Li Wang
##                     Kirk Li, University of Washington
##                     Skye Bender-deMoll, University of Washington
##                     Chad Klumb
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("ergm").
## NOTE: Versions before 3.6.1 had a bug in the implementation of the bd()
## constriant which distorted the sampled distribution somewhat. In
## addition, Sampson's Monks datasets had mislabeled vertices. See the
## NEWS and the documentation for more details.
## NOTE: Some common term arguments pertaining to vertex attribute and
## level selection have changed in 3.10.0. See terms help for more
## details. Use 'options(ergm.term=list(version="3.9.4"))' to use old
## behavior.
library(sna)

# Specify the network we'll call net - where dyads 
# are the unit of analysis...
net.ergm = network(links[,1:2])

list.vertex.attributes(net.ergm)
## [1] "na"           "vertex.names"
v.names = get.vertex.attribute(net.ergm,'vertex.names')

Reorder the attribute value and assign to network object:

x = match(v.names,nodes$sender)
ideology = nodes$leanR[x]
# Assign node-level attributes to actors in "net"
net.ergm %v% "ideology" <- ideology

Estimate the models:

# ERGM
summary(net.ergm~edges+mutual+triangle) #43815 edges+ 2548501 triads
##    edges   mutual triangle 
##    43815    13539  2548501
m = ergm(net.ergm ~ edges + mutual + triangle + nodefactor("ideology") + nodematch("ideology"),burnin=1500,MCMCsamplesize=3000,verbose=FALSE)
## Warning: `set_attrs()` is deprecated as of rlang 0.3.0
## This warning is displayed once per session.
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Starting Monte Carlo maximum likelihood estimation (MCMLE):
## Iteration 1 of at most 20:
## Optimizing with step length 0.314557452250294.
## The log-likelihood improved by 5.31.
## Iteration 2 of at most 20:
## Optimizing with step length 0.00844294975794158.
## The log-likelihood improved by 1.788.
## Iteration 3 of at most 20:
## Optimizing with step length 0.012753892866551.
## The log-likelihood improved by 1.643.
## Iteration 4 of at most 20:
## Optimizing with step length 0.00455401546609292.
## The log-likelihood improved by 1.311.
## Iteration 5 of at most 20:
## Optimizing with step length 0.00668352555036866.
## The log-likelihood improved by 1.989.
## Iteration 6 of at most 20:
## Optimizing with step length 0.00672115774255987.
## The log-likelihood improved by 1.71.
## Iteration 7 of at most 20:
## Optimizing with step length 0.00970910333697182.
## The log-likelihood improved by 1.851.
## Iteration 8 of at most 20:
## Optimizing with step length 0.00752783507213009.
## The log-likelihood improved by 1.794.
## Iteration 9 of at most 20:
## Optimizing with step length 0.00898130232912285.
## The log-likelihood improved by 2.017.
## Iteration 10 of at most 20:
## Optimizing with step length 0.00676213255308854.
## The log-likelihood improved by 1.568.
## Iteration 11 of at most 20:
## Optimizing with step length 0.00597566550843117.
## The log-likelihood improved by 1.022.
## Iteration 12 of at most 20:
## Optimizing with step length 0.00819934668333498.
## The log-likelihood improved by 1.856.
## Iteration 13 of at most 20:
## Optimizing with step length 0.0052487410010201.
## The log-likelihood improved by 1.535.
## Iteration 14 of at most 20:
## Optimizing with step length 0.0119039127279683.
## The log-likelihood improved by 1.709.
## Iteration 15 of at most 20:
## Optimizing with step length 0.0045436721200531.
## The log-likelihood improved by 1.428.
## Iteration 16 of at most 20:
## Optimizing with step length 0.00742594217571554.
## The log-likelihood improved by 1.448.
## Iteration 17 of at most 20:
## Optimizing with step length 0.00673435469681758.
## The log-likelihood improved by 1.592.
## Iteration 18 of at most 20:
## Optimizing with step length 0.00522838822983726.
## The log-likelihood improved by 1.204.
## Iteration 19 of at most 20:
## Optimizing with step length 0.00669540753129995.
## The log-likelihood improved by 3.27.
## Iteration 20 of at most 20:
## Optimizing with step length 0.0156837970452019.
## The log-likelihood improved by 1.392.
## MCMLE estimation did not converge after 20 iterations. The estimated coefficients may not be accurate. Estimation may be resumed by passing the coefficients as initial values; see 'init' under ?control.ergm for details.
## Finished MCMLE.
## Evaluating log-likelihood at the estimate. Using 20 bridges: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 .
## This model was fit using MCMC.  To examine model diagnostics and check
## for degeneracy, use the mcmc.diagnostics() function.
summary(m)
## 
## ==========================
## Summary of model fit
## ==========================
## 
## Formula:   net.ergm ~ edges + mutual + triangle + nodefactor("ideology") + 
##     nodematch("ideology")
## 
## Iterations:  20 out of 20 
## 
## Monte Carlo MLE Results:
##                             Estimate Std. Error MCMC %  z value Pr(>|z|)    
## edges                     -5.0102167  0.0229265      4 -218.534   <1e-04 ***
## mutual                     4.1573093  0.0472189      7   88.043   <1e-04 ***
## triangle                   0.0085276  0.0001142      8   74.696   <1e-04 ***
## nodefactor.ideology.left   0.2737991  0.0191066      4   14.330   <1e-04 ***
## nodefactor.ideology.right  0.2694375  0.0167571      5   16.079   <1e-04 ***
## nodematch.ideology        -0.1980415  0.0249779      3   -7.929   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 1879898  on 1356060  degrees of freedom
##  Residual Deviance:  357831  on 1356054  degrees of freedom
##  
## AIC: 357843    BIC: 357916    (Smaller is better.)

The ERGM runs by an MCMC process with multiple starts, and this helps you see if the model is converging. If the estimated coefficient values were to change dramatically, it might be a sign of a poorly specified model). You should see the log-likelihood increase with each iteration. You will see a loop warning. You can ignore this for now. Before trying to interpret the data, it is a good idea to check the MCMC process. Also see link.

mcmc.diagnostics(m)
## Sample statistics summary:
## 
## Iterations = 16384:1063936
## Thinning interval = 1024 
## Number of chains = 1 
## Sample size per chain = 1024 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                              Mean       SD Naive SE Time-series SE
## edges                        9632   110.53   3.4539          39.85
## mutual                       2143    31.07   0.9711          17.19
## triangle                  -246102 21379.60 668.1124       17145.00
## nodefactor.ideology.left     5833    96.87   3.0273          38.60
## nodefactor.ideology.right    7440   145.47   4.5458          64.16
## nodematch.ideology           2311    77.19   2.4122          32.46
## 
## 2. Quantiles for each variable:
## 
##                              2.5%     25%     50%     75%   97.5%
## edges                        9466    9533    9615    9728    9839
## mutual                       2083    2121    2137    2176    2192
## triangle                  -281800 -264556 -241631 -228353 -211022
## nodefactor.ideology.left     5639    5763    5848    5905    5991
## nodefactor.ideology.right    7125    7374    7475    7527    7696
## nodematch.ideology           2188    2249    2301    2383    2441
## 
## 
## Sample statistics cross-correlations:
##                                edges     mutual    triangle
## edges                      1.0000000 0.61910070 -0.15850543
## mutual                     0.6191007 1.00000000  0.05388754
## triangle                  -0.1585054 0.05388754  1.00000000
## nodefactor.ideology.left   0.4890717 0.29480070 -0.71278415
## nodefactor.ideology.right  0.3512321 0.25148715  0.72887401
## nodematch.ideology         0.5204835 0.19193232 -0.69193787
##                           nodefactor.ideology.left nodefactor.ideology.right
## edges                                    0.4890717                 0.3512321
## mutual                                   0.2948007                 0.2514872
## triangle                                -0.7127841                 0.7288740
## nodefactor.ideology.left                 1.0000000                -0.4077812
## nodefactor.ideology.right               -0.4077812                 1.0000000
## nodematch.ideology                       0.6329934                -0.4146437
##                           nodematch.ideology
## edges                              0.5204835
## mutual                             0.1919323
## triangle                          -0.6919379
## nodefactor.ideology.left           0.6329934
## nodefactor.ideology.right         -0.4146437
## nodematch.ideology                 1.0000000
## 
## Sample statistics auto-correlation:
## Chain 1 
##              edges    mutual  triangle nodefactor.ideology.left
## Lag 0    1.0000000 1.0000000 1.0000000                1.0000000
## Lag 1024 0.9850755 0.9936347 0.9969646                0.9877589
## Lag 2048 0.9695266 0.9875246 0.9939356                0.9757808
## Lag 3072 0.9544138 0.9809104 0.9908775                0.9640929
## Lag 4096 0.9392957 0.9744916 0.9879080                0.9526487
## Lag 5120 0.9243871 0.9678825 0.9849298                0.9414436
##          nodefactor.ideology.right nodematch.ideology
## Lag 0                    1.0000000          1.0000000
## Lag 1024                 0.9900008          0.9890087
## Lag 2048                 0.9793902          0.9777011
## Lag 3072                 0.9680535          0.9665186
## Lag 4096                 0.9564845          0.9557654
## Lag 5120                 0.9453303          0.9455043
## 
## Sample statistics burn-in diagnostic (Geweke):
## Warning in approx.hotelling.diff.test(x1, x2, var.equal = TRUE): Effective
## degrees of freedom (1.96032671967322) must exceed the number of varying
## parameters (6). P-value will not be computed.
## Chain 1 
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##                     edges                    mutual                  triangle 
##                   -0.8513                   -2.7720                   -6.1644 
##  nodefactor.ideology.left nodefactor.ideology.right        nodematch.ideology 
##                    4.4522                   -8.7040                    7.3871 
## 
## Individual P-values (lower = worse):
##                     edges                    mutual                  triangle 
##              3.946145e-01              5.571473e-03              7.073640e-10 
##  nodefactor.ideology.left nodefactor.ideology.right        nodematch.ideology 
##              8.499431e-06              3.204979e-18              1.500988e-13 
## Joint P-value (lower = worse):  0 .

## 
## MCMC diagnostics shown here are from the last round of simulation, prior to computation of final parameter estimates. Because the final estimates are refinements of those used for this simulation run, these diagnostics may understate model performance. To directly assess the performance of the final model on in-model statistics, please use the GOF command: gof(ergmFitObject, GOF=~model).

You will see several plots and output. The plots to the right should look approximately normal. The output tells us three things of interest: 1) The accuracy of the model (r) 2) If we used a sufficiently large burn-in 3) If we used a sufficiently large sample in the simulation

In our case the samples might be too small. This doesn’t mean the results of the ERGM results are wrong, but we should take care in specifying the sample size. Let’s look at the summary of the results. We could create a new object that is the summary score info, but here we’ll just send it to the screen.

Let’s assess the model

summary(m)
## 
## ==========================
## Summary of model fit
## ==========================
## 
## Formula:   net.ergm ~ edges + mutual + triangle + nodefactor("ideology") + 
##     nodematch("ideology")
## 
## Iterations:  20 out of 20 
## 
## Monte Carlo MLE Results:
##                             Estimate Std. Error MCMC %  z value Pr(>|z|)    
## edges                     -5.0102167  0.0229265      4 -218.534   <1e-04 ***
## mutual                     4.1573093  0.0472189      7   88.043   <1e-04 ***
## triangle                   0.0085276  0.0001142      8   74.696   <1e-04 ***
## nodefactor.ideology.left   0.2737991  0.0191066      4   14.330   <1e-04 ***
## nodefactor.ideology.right  0.2694375  0.0167571      5   16.079   <1e-04 ***
## nodematch.ideology        -0.1980415  0.0249779      3   -7.929   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 1879898  on 1356060  degrees of freedom
##  Residual Deviance:  357831  on 1356054  degrees of freedom
##  
## AIC: 357843    BIC: 357916    (Smaller is better.)
lapply(m[1],exp)
## $coef
##                     edges                    mutual                  triangle 
##               0.006669458              63.899354193               1.008564070 
##  nodefactor.ideology.left nodefactor.ideology.right        nodematch.ideology 
##               1.314950617               1.309227823               0.820335830

The first section gives the model (formula) estimated in the ERGM. Here we said that the network was a function of edges, mutual ties and matching with respect ideology. Another way to think about this, is that we’re generating random networks that match the observed network with respect to the number of edges, the number of mutual dyads, the number of ties within/between ideology.

The second section tells how many iterations were done. The MCMC sample size is the number of random networks generated in the production of the estimate.

The next section gives the coefficients, their SEs and pValues. These are on a log-odds scale, so we interpret them like logit coefficients.

An edges value of -4.90694 means that the odds of seeing a tie on any dyad are exp(-4.90694) = 0.007395083, which could be thought of as density net of the other factors in the model. If you only have ‘edges’ in the model, then exp(b1) should be very close to the observed density. Edges are equivalent to a model intercept – while possible, I can’t imagine why one would estimate a model without an edges parameter.

A mutual value of 4.08642 means that reciprocity is more common than expected by chance (positive and significant), and here we see that exp(4.08642)=59.526674874, so it’s much more likely than chance – we are 60 times as likely to see a tie from ij if ji than if j did not nominate i.

We are exp(-0.22823)=0.795942220 times likely to nominate within ideology than across ideology.

The final section refers to overall model fit and MCMC diagnostic statistics (AIC, BIC).

fit = gof(m ~ odegree + idegree + espartners + distance)
#fit
plot(fit)

2.2(b). Valued ERGM

See link

2.2(c). TERGM

2.3. LSM: Latent Space Model

library(eigenmodel) #symmetric relational data
mtx = as.matrix(get.adjacency(as.undirected(net),sparse=FALSE,type='both'))
lsm.fit1 <- eigenmodel_mcmc(Y=mtx, R=2,S=150,burn=100)
## 5 percent done (iteration 13) Fri Mar 06 15:21:16 2020
## 10 percent done (iteration 25) Fri Mar 06 15:21:25 2020
## 15 percent done (iteration 38) Fri Mar 06 15:21:34 2020
## 20 percent done (iteration 50) Fri Mar 06 15:21:42 2020
## 25 percent done (iteration 63) Fri Mar 06 15:21:52 2020
## 30 percent done (iteration 75) Fri Mar 06 15:22:01 2020
## 35 percent done (iteration 88) Fri Mar 06 15:22:10 2020
## 40 percent done (iteration 100) Fri Mar 06 15:22:19 2020
## 45 percent done (iteration 113) Fri Mar 06 15:22:29 2020
## 50 percent done (iteration 125) Fri Mar 06 15:22:38 2020
## 55 percent done (iteration 138) Fri Mar 06 15:22:49 2020
## 60 percent done (iteration 150) Fri Mar 06 15:22:59 2020
## 65 percent done (iteration 163) Fri Mar 06 15:23:09 2020
## 70 percent done (iteration 175) Fri Mar 06 15:23:19 2020
## 75 percent done (iteration 188) Fri Mar 06 15:23:29 2020
## 80 percent done (iteration 200) Fri Mar 06 15:23:39 2020
## 85 percent done (iteration 213) Fri Mar 06 15:23:50 2020
## 90 percent done (iteration 225) Fri Mar 06 15:23:59 2020
## 95 percent done (iteration 238) Fri Mar 06 15:24:09 2020
## 100 percent done (iteration 250) Fri Mar 06 15:24:19 2020
vec = eigen(lsm.fit1$ULU_postmean)$vec[, 1:2]
plot(lsm.fit1)

IVs = array(mm,dim=c(1178,1178,1))
lsm.fit2 <- eigenmodel_mcmc(Y=mtx, X=IVs, R=2,S=150,burn=20)
## 5 percent done (iteration 9) Fri Mar 06 15:24:35 2020
## 10 percent done (iteration 17) Fri Mar 06 15:24:40 2020
## 15 percent done (iteration 26) Fri Mar 06 15:24:47 2020
## 20 percent done (iteration 34) Fri Mar 06 15:24:54 2020
## 25 percent done (iteration 43) Fri Mar 06 15:25:01 2020
## 30 percent done (iteration 51) Fri Mar 06 15:25:08 2020
## 35 percent done (iteration 60) Fri Mar 06 15:25:16 2020
## 40 percent done (iteration 68) Fri Mar 06 15:25:23 2020
## 45 percent done (iteration 77) Fri Mar 06 15:25:30 2020
## 50 percent done (iteration 85) Fri Mar 06 15:25:36 2020
## 55 percent done (iteration 94) Fri Mar 06 15:25:45 2020
## 60 percent done (iteration 102) Fri Mar 06 15:25:52 2020
## 65 percent done (iteration 111) Fri Mar 06 15:25:59 2020
## 70 percent done (iteration 119) Fri Mar 06 15:26:06 2020
## 75 percent done (iteration 128) Fri Mar 06 15:26:13 2020
## 80 percent done (iteration 136) Fri Mar 06 15:26:19 2020
## 85 percent done (iteration 145) Fri Mar 06 15:26:27 2020
## 90 percent done (iteration 153) Fri Mar 06 15:26:33 2020
## 95 percent done (iteration 162) Fri Mar 06 15:26:40 2020
## 100 percent done (iteration 170) Fri Mar 06 15:26:47 2020
plot(lsm.fit2)