x <- 1 #Preference: primary method
x = 1 #Preference: within functions
1 -> x #Preference: never
ls()
Cheburashka <- "a cartoon character"
Cheburashka
cheburashka
orig.folder <- getwd()
orig.folder
dir.create("new.folder")
list.files()
setwd("new.folder")
list.files()
setwd(orig.folder)
file.remove("new.folder")
list.files()
install.packages("igraph")
library(igraph)
detach("package:igraph", unload=TRUE)
x <- NA
NA # Non-applicable, missing
NULL # Undefined or empty value
NaN # Not a Number
Inf # Infinite
is.na(x)
is.null(x)
is.nan(x)
is.infinite(x)
is.numeric(42)
is.numeric("42")
is.numeric(3.12)
is.logical(T)
is.logical(FALSE)
is.character(FALSE)
is.character("42")
is.character("HSE Network Workshop")
as.numeric("42") + 1
as.character(as.numeric("0") + 1)
as.character(!as.logical("1"))
as.character(as.logical(as.numeric("0") + 1))
as.character(is.logical(as.logical(as.numeric("0")) + 1))
as.character(!as.logical(as.numeric("1")))
RU.cit <- c("Mos", "StP", "Nov", "Yek") # Create vector RU.cit <- c(RU.cit, "Niz") # Add element RU.cit[3] # Retrieve element 3
RU.pops <- c(10.4, 4.6, 1.4, 1.3) # Create vector names(RU.pops) <- RU.cit[c(1, 2, 3, 4)] # Name vector RU.pops[c(4,2,3,1)] # Permute the order and retrieve length(RU.pops) # Retrieve the length
RU.vis <- c(TRUE, TRUE, FALSE, FALSE) #Create vector RU.vis[-c(4, 2)] # Remove elements 4 and 2
all(1:5 == c(1, 2, 3, 4, 5))
all(1:5 == seq(from = 1, to = 5, by = 1))
rep(1, times = 5)
rep(NA, times = 4)
rep("Beetlejuice", times = 3)
RU.mat <- cbind(pop = RU.pops, vis = RU.vis)
rownames(RU.mat) <- RU.cit[c(1:4)]
colnames(RU.mat)
RU.mat <- matrix( , nrow = length(RU.pops), ncol = 2)
colnames(RU.mat) <- c("pop", "vis")
rownames(RU.mat) <- names(RU.pops)
RU.mat[ , "pop"] <- RU.pops
RU.mat[ , "vis"] <- RU.vis
RU.mat[1, ]
RU.mat[ , "pop"]
RU.mat[1, 2]
RU.mat[1, , ] # What does the error mean?
RU.mat[ , "vis"]
RU.vis
RU.df <- as.data.frame(RU.mat)
RU.df$vis <- as.logical(RU.df$vis)
RU.df$vis[2]
RU.df[2, "vis"]
RU.df[["vis"]][2]
RU.df[[2]][2]
RU.list <- list(RU.mat, RU.vis, RU.df)
names(RU.list) <- c("Mat", "Vec", "DF")
RU.list$DF$vis[2]
RU.list[["DF"]]$vis[2]
RU.list[[3]]$vis[2]
RU.list[["DF"]][["vis"]][2]
RU.list[["DF"]][2, "vis"]
RU.list[["DF"]][2, 2]
xTimesxMinus1 <- function(x)
return(x * (x-1))
xTimesxMinus1(42)
InvHypSineTrans <- function(x){
x.ret <- log(x + sqrt(x^2 + 1))
return(x.ret)
}
plot(x = 0:1000, InvHypSineTrans(0:1000), main = "Hyperbolic Sine Transformation", xlab = "x", ylab = "y")
library(compiler)
InvHypSineTrans.cmp <- cmpfun(InvHypSineTrans)
system.time(InvHypSineTrans(0:10000000))
system.time(InvHypSineTrans.cmp(0:10000000))
??"Chi Squared"
help.search("Chi Squared")
?chisq.test
chisq.test
lab.seq <- seq(from = -100, to = 100, by = 20)
?sapply
lab.mat <- sapply(lab.seq, rnorm, n = 100)
dim(lab.mat)
fix(lab.mat)
?apply
lab.means <- apply(lab.mat, 2, mean)
plot(x = lab.seq, y = lab.means)
test.val <- "It works!"
test.val2 <- paste(test.val, "Really")
source("test.val.source.txt")
ls()
print(test.val)
print(test.val2)
savehistory("Filename.RHistory")
loadhistory("Filename.RHistory")
save.image("Filename.RData")
load("Filename.RData")
library(igraph)
wikidat <- read.table(file = "http://pastebin.com/raw.php?i=UVmTznBj", sep = "\t", header = TRUE, skip = 6)
# 'sep' is the separator
# 'header' indicates column names
# 'skip' are the number of initial rows to ignore
wikidat <- graph.data.frame(d = wikidat, directed = TRUE)
summary(wikidat) # Nodes: 7115; Edges: 103689
library(igraph)
marvnet <- read.graph("http://bioinfo.uib.es/~joemiro/marvel/porgat.txt", format = "pajek")
summary(marvnet)
?read.graph
?graoh.data.frame
?graph.adjacency
?graph.adjlist
?graph.constructors
is.simple(wikidat) # Verify it's simple is.directed(wikidat) # Verify it's directed vcount(wikidat) # Number of vertices ecount(wikidat) # Number of edges graph.density(wikidat) # Density
V(wikidat)$indegree <- degree(wikidat, mode = "in") V(wikidat)$outdegree <- degree(wikidat, mode = "out") V(wikidat)$totaldegree <- degree(wikidat, mode = "total")
all.vatts <- list.vertex.attributes(wikidat)[-1] all.vatts <- sapply(all.vatts, get.vertex.attribute, graph = wikidat) summary(all.vatts)
hist(log(V(wikidat)$outdegree), main="Wiki Admin Votes", xlab="Outdegree (log)")
unlist(dyad.census(wikidat))
reciprocity(wikidat)
triad.census(wikidat)
transitivity(wikidat) # What does this number refer to?
V(wikidat)$loc.trans <- transitivity(wikidat, "local")
V(wikidat)$name[which.max(V(wikidat)$loc.trans)]
average.path.length(wikidat)
diameter(wikidat) V(wikidat)$name[(get.diameter(wikidat))] hist(shortest.paths(wikidat), main = "Histogram of Shortest Path Lengths", xlab = "Geodesic Lengths")
V(wikidat)$betw <- betweenness(wikidat)
E(wikidat)$eb <- edge.betweenness(wikidat)
hist(log(E(wikidat)$eb), main = "Histogram of Edge Betweenness", sub = "Wiki Admin Voting", xlab = "Edge Betweenness (log)")
#How many weak and strong components do we have? sapply(c("weak", "strong"), no.clusters, graph = wikidat)
#Notice the distributions clusters(wikidat, mode = "weak")$csize clusters(wikidat, mode = "strong")$csize #Assign membership as attributes
V(wikidat)$comp.w <- clusters(wikidat, mode = "weak")$membership V(wikidat)$comp.s <- clusters(wikidat, mode = "strong")$membership
# Are the vertexes part of the giant component? V(wikidat)$giantcomp.w <- V(wikidat)$comp.w == which.max(clusters(wikidat, mode = "weak")$csize) V(wikidat)$giantcomp.s <- V(wikidat)$comp.s == which.max(clusters(wikidat, mode = "strong")$csize)
V(wikidat)$closeness <- closeness(wikidat)
V(wikidat)$evcent <- evcent(wikidat)$vector
V(wikidat)$kc.undir <- graph.coreness(as.undirected(wikidat, mode = "collapse"))
#How are undirected k-cores related to centrality?
kc.cent.corr.fun<-function(x, y = V(wikidat)$kc.undir){
a <- get.vertex.attribute(wikidat, name = x)
b <- y
cor.est <- cor.test(a, b, method = "spearman", exact = FALSE)$estimate
return(cor.est)
}
kc.cent.corr.fun <- cmpfun(kc.cent.corr.fun)
cent.atts <- c("indegree", "outdegree", "totaldegree", "betw", "closeness", "evcent")
cent.cors <- sapply(cent.atts, kc.cent.corr.fun)
names(cent.cors) <- cent.atts
barplot(cent.cors, ylab = "Spearman's rho", xlab = "Centrality", main = "Correlation with k-coreness")
#Directed k-cores
sapply(c("in", "out", "all"), function(y) return(sapply(cent.atts, kc.cent.corr.fun, y = graph.coreness(wikidat, mode = y))))
rm(cent.atts, kc.cent.corr.fun)
wikidat.comms <- multilevel.community(as.undirected(wikidat, mode = "collapse")) wikidat.comms$modularity V(wikidat)$comms <- wikidat.comms$membership xtabs(~V(wikidat)$comms) # 3, 4, 5, 9, and 11 are the biggest
wikidat.comms.w <- multilevel.community(as.undirected(wikidat, mode = "collapse"), weights = max(E(wikidat)$eb) - E(wikidat)$eb) wikidat.comms.w$modularity # Slightly higher V(wikidat)$comms.w <- wikidat.comms.w$membership xtabs(~V(wikidat)$comms.w) # 4, 5, 14, and 15 are the biggest
save.image("HSE_Moscow_NetSchool.RData")
savehistory("HSE_Moscow_NetSchool.RHistory")
vcount(wikidat) # 7115 nodes
ecount(wikidat) # 103689 edges
plot(wikidat)
sampsize <- round(vcount(wikidat) * 0.05) # 5% sample
subsamp <- sample.int(vcount(wikidat), sampsize)
wikidat.sg <- induced.subgraph(wikidat, subsamp)
plot(wikidat.sg)
sampsize <- round(vcount(wikidat) * 0.05) # 5% sample
subsamp <- sample.int(vcount(wikidat), sampsize)
wikidat.sg <- induced.subgraph(wikidat, subsamp)
plot(wikidat.sg)
plot(wikidat.sg, vertex.size = 3, vertex.label = "")
subsamp <- which(degree(wikidat.sg, mode = "all") > 0)
wikidat.sg <- induced.subgraph(wikidat.sg, subsamp)
plot(wikidat.sg, vertex.size = 3, vertex.label = "", edge.arrow.size = 0.5)
# Sample based on betweenness and large communities cond1 <- V(wikidat)$betw > 1 cond2 <- V(wikidat)$comms %in% c(3, 4, 5, 9, 11) subsamp <- which(cond1 & cond2) wikidat.sg <- induced.subgraph(wikidat, subsamp) vcount(wikidat.sg) # From 7,115 to 1,372 nodes ecount(wikidat.sg) # From 103,689 to 40,587 edges # Still too big to see anything. # Systematic 25% sample on eigenvector centrality evrank <- rank(V(wikidat.sg)$evcent, "random") nsyssamp <- seq(from = 1, to = max(evrank), by = 4) nsyssamp <- which(evrank%in% nsyssamp) wikidat.sg <- induced.subgraph(wikidat.sg, nsyssamp)
# Systematic 25% sample on edge betweenness ebrank <- rank(E(wikidat.sg)$eb, "random") esyssamp <- seq(from = 1, to = max(ebrank), by = 4) esyssamp <- which(ebrank %in% esyssamp) wikidat.sg <- subgraph.edges(wikidat.sg, esyssamp) plot(wikidat.sg, vertex.size = 3, vertex.label = "", edge.arrow.size = 0.25)
?layout.auto
wikidat.sg.lo <- layout.fruchterman.reingold(wikidat.sg, params = list(niter = 2500, area = vcount(wikidat.sg)^3))
V(wikidat.sg)$x <- wikidat.sg.lo[,1]
V(wikidat.sg)$y <- wikidat.sg.lo[,2]
plot(wikidat.sg, vertex.size = 3, vertex.label = "", edge.arrow.size = 0.25)
ewidth <- ecdf(E(wikidat.sg)$eb)(E(wikidat.sg)$eb)
E(wikidat.sg)$width <- (1.25 * ewidth)^2
plot(wikidat.sg, vertex.size = 3, vertex.label = "", edge.arrow.size = 0.25, edge.curved = TRUE)
library(RColorBrewer )
vcols <- brewer.pal(3, "Accent")
names(vcols) <- unique(V(wikidat.sg)$comms)
V(wikidat.sg)$color <- vcols[as.character(V(wikidat.sg)$comms)]
V(wikidat.sg)$size <- 6 * ecdf(V(wikidat.sg)$evcent)(V(wikidat.sg)$evcent)
plot(wikidat.sg, vertex.label = "", edge.arrow.size = 0.25, edge.curved = TRUE)
ebrank <- rank(E(wikidat.sg)$eb, "random") esyssamp <- seq(from = 1, to = max(ebrank), by = 2) esyssamp <- which(ebrank %in% esyssamp) wikidat.sg <- subgraph.edges(wikidat.sg, esyssamp) plot(wikidat.sg, vertex.label = "", edge.arrow.size = 0.25, edge.curved = .25)
par(mfrow = c(2, 2), mar = c(0.1, 0.1, 0.1, 0.1))
plot(wikidat.sg, vertex.label = "", edge.arrow.size = 0.25, edge.curved = .25, layout = layout.kamada.kawai, margin = c(0, 0, 0, 0))
plot(wikidat.sg, vertex.label = "", edge.arrow.size = 0.25, edge.curved = .25, layout = layout.circle, margin = c(0, 0, 0, 0))
plot(wikidat.sg, vertex.label = "", edge.arrow.size = 0.25, edge.curved = .25, layout = layout.svd, margin = c(0, 0, 0, 0))
plot(wikidat.sg, vertex.label = "", edge.arrow.size = 0.25, edge.curved = .25, layout = layout.mds, margin = c(0, 0, 0, 0))
wikidat.sg.elist <- get.edgelist(wikidat.sg, names = FALSE)
edgecolor <- apply(wikidat.sg.elist, 1, function(x){
c1 <-V(wikidat.sg)$comms[x[1]]
c2 <- V(wikidat.sg)$comms[x[2]]
return(ifelse(c1 == c2, vcols[as.character(c1)], "darkgrey"))
})
E(wikidat.sg)$color <- edgecolor
plot(wikidat.sg, vertex.label = "", edge.arrow.size = 0.25, edge.curved = .25, layout = layout.svd, margin = c(0, 0, 0, 0))
?igraph.plotting
reciprocity(wikidat)
graph.order <- seq(from = 10, to = 100, by = 5)
graph.trans <- sapply(graph.order, function(x) replicate(1000, transitivity(erdos.renyi.game(x, .1))))
graph.apl <- sapply(graph.order, function(x) replicate(1000, average.path.length(erdos.renyi.game(x, .1))))
par(mfrow = c(1, 2))
plot(x = graph.order, y = apply(graph.trans, 2, mean), ylim = c(0, 1), xlim = c(0, 100), type = "l", xlab = "Order", ylab = "Transitivity", main = "Tie Prob = 0.10; Simulations=1000")
lines(x = graph.order, y = apply(graph.trans, 2, quantile, probs = .025, na.rm = TRUE), col = "red", lty = 2, type = "l")
lines(x = graph.order, y = apply(graph.trans, 2, quantile, probs = .975, na.rm = TRUE), col = "red", lty = 2, type = "l")
plot(x = graph.order, y = apply(graph.apl, 2, mean), ylim = c(0, 5), xlim = c(0, 100), type = "l", xlab = "Order", ylab = "Average Path Length", main = "Tie Prob = 0.10; Simulations = 1000")
lines(x = graph.order, y = apply(graph.apl, 2, quantile, probs = .025, na.rm = TRUE), col = "red", lty = 2, type = "l")
lines(x = graph.order, y = apply(graph.apl, 2, quantile, probs = .975, na.rm = TRUE), col = "red", lty = 2, type = "l")
graph.tieprob <- seq(from = .01, to = .30, by = .01)
graph.trans <- sapply(graph.tieprob, function(x) replicate(1000, transitivity(erdos.renyi.game(100, x))))
graph.apl <- sapply(graph.tieprob, function(x) replicate(1000, average.path.length(erdos.renyi.game(100, x))))
par(mfrow = c(1, 2))
plot(x = graph.tieprob, y = apply(graph.trans, 2, mean), ylim = c(0, 1), xlim = c(0, .3), type = "l", xlab = "Tie Probability", ylab = "Transitivity", main = "Order = 100; Simulations = 1000")
lines(x = graph.tieprob, y = apply(graph.trans, 2, quantile, probs = .025, na.rm = TRUE), col = "red", lty = 2, type = "l")
lines(x = graph.tieprob, y = apply(graph.trans, 2, quantile, probs = .975, na.rm = TRUE), col = "red", lty = 2, type = "l")
plot(x = graph.tieprob, y = apply(graph.apl, 2, mean), ylim = c(0,8), xlim = c(0,.3), type = "l", xlab = "Tie Probability", ylab = "Average Path Length", main = "Order=100; Simulations = 1000")
lines(x = graph.tieprob, y = apply(graph.apl, 2, quantile, probs = .025, na.rm = TRUE), col = "red", lty = 2, type = "l")
lines(x = graph.tieprob, y = apply(graph.apl, 2, quantile, probs = .975, na.rm = TRUE), col = "red", lty = 2, type = "l")
# Do conservative blogs have more links than liberal ones? polblogs <- nexus.get(id = "polblogs") #0 = liberal, 1 = conservative polblogs.totdeg <- degree(polblogs, mode = "all") obs.diff <- mean(polblogs.totdeg[V(polblogs)$LeftRight == 1]) - mean(polblogs.totdeg[V(polblogs)$LeftRight == 0]) perm.fun <- function(){ perm.attribs <- sample(V(polblogs)$LeftRight) return(mean(polblogs.totdeg[perm.attribs == 1]) - mean(polblogs.totdeg[perm.attribs == 0])) }; perm.fun<-cmpfun(perm.fun)
exp.diffs <- replicate(1000, perm.fun()) mean(obs.diff > exp.diffs) # ~0.86, not significantly greater than by chance assignment
e.i.index <- function(el, attrib){ in.ties <- sum(attrib[el[,1]] == attrib[el[ , 2]]) ex.ties <- sum(attrib[el[,1]] != attrib[el[ , 2]]) return((ex.ties - in.ties)/(ex.ties + in.ties)) }; e.i.index<-cmpfun(e.i.index)
polblogs.el <- get.edgelist(polblogs, names = FALSE) polblogs.attrib <- V(polblogs)$LeftRight obs.val <- e.i.index(polblogs.el, polblogs.attrib) exp.val <- replicate(1000, e.i.index(polblogs.el, sample(polblogs.attrib))) mean(obs.val < exp.val) # We expect fewer outgroup ties
karate <- nexus.get("karate")
karate.lo <- layout.kamada.kawai(karate, params = list(niter = 1000))
par(mfrow = c(1, 2))
plot(karate, vertex.size = 5, vertex.label = V(karate)$name, vertex.label.family = "sans", margin = c(0, 0, 0, 0), edge.curved = .33, layout = karate.lo)
karate.rw <- rewire(karate, mode = "simple", niter = 10*ecount(karate))
plot(karate.rw, vertex.size = 5, vertex.label = V(karate)$name, vertex.label.family = "sans", margin = c(0, 0, 0, 0), edge.curved = .33, layout = karate.lo)
obs.trans <- transitivity(karate)
karate.rw.fun <- function(){
krw <- rewire(karate, mode = "simple", niter = 10 * ecount(karate))
return(transitivity(krw))
}; karate.rw.fun <- cmpfun(karate.rw.fun)
exp.trans <- replicate(1000, karate.rw.fun())
mean(obs.trans > exp.trans) # ~0.87
A fixed number (or proportion) of edges are randomly assigned to a fixed set of nodes.
er.graph <- erdos.renyi.game(100, .03)
plot(er.graph, vertex.size = 5, vertex.label = "")
obs.incent <- centralization.degree(wikidat, mode = "in", loops = FALSE)$centralization
exp.incent.fun <- function(){
new.net <- erdos.renyi.game(vcount(wikidat), ecount(wikidat), type = "gnm", directed = TRUE)
return(centralization.degree(new.net, mode = "in", loops = FALSE)$centralization)
}; exp.incent.fun <- cmpfun(exp.incent.fun)
exp.incent <- replicate(1000, exp.incent.fun())
mean(obs.incent > exp.incent)
Networks grow according to the principle of preferential attachment.
wikidat.in <- degree(wikidat, mode = "in")
wikidat.out <- degree(wikidat, mode = "out")
wikidat.plfit.in <- power.law.fit(wikidat.in)
wikidat.plfit.out <- power.law.fit(wikidat.out)
print(c(wikidat.plfit.in$alpha, wikidat.plfit.out$alpha))
exp.incent.pl.fun <- function(){
new.net <- static.power.law.game(vcount(wikidat), ecount(wikidat), exponent.out = wikidat.plfit.out$alpha, exponent.in = wikidat.plfit.in$alpha)
return(centralization.degree(new.net, mode="in", loops = FALSE)$centralization)
}; exp.incent.pl.fun <- cmpfun(exp.incent.pl.fun)
exp.incent.pl <- replicate(1000, exp.incent.pl.fun())
mean(obs.incent > exp.incent.pl) # Obs more centralized
par(mfrow = c(1, 2))
hist(exp.incent, main = "E-R Indegree Centralization")
hist(exp.incent.pl, main = "Power Law Indegree Centralization") # ~10x more centralized graphs
Local neighborhoods bridged by a few actors with ties that jump across neighborhoods.
par(mfrow=c(1, 2))
g1 <- graph.lattice(100, nei = 1)# nei=mean degree / 2
g1.layout<-layout.fruchterman.reingold(g1)
V(g1)$x <- g1.layout[ , 1]; V(g1)$y <- g1.layout[ , 2]
g1.rewire <- rewire.edges(g1, p = 0.1) # prob of .1 rewire
plot(g1, vertex.size = 5, vertex.label = "", main = "Lattice")
plot(g1.rewire, vertex.size = 5, vertex.label = "", main = "Rewire")
What's the relationship between the neighborhood index (nei) and graph density?
#The following two plots should be approximately equal
dev.off(); par(mfrow = c(1, 2))
sw.net <- watts.strogatz.game(dim = 1, size = 100, nei = 1, p = .1)
plot(g1.rewire, vertex.size = 5, vertex.label = "", main = "Rewired Lattice")
plot(sw.net, vertex.size = 5, vertex.label = "", layout = g1.layout, main = "Watts-Strogatz")
par(mfrow = c(1, 2))
g2.ws <- watts.strogatz.game(dim = 1, size = 100, nei = 1, p = .9)
g2.er <- erdos.renyi.game(100, (2 * 100 / 2) / (100 * 99 / 2), directed = FALSE)
plot(g2.ws, vertex.size = 5, vertex.label = "", main = "Watts-Strogatz")
plot(g2.er, vertex.size = 5, vertex.label = "", main = "Erdős–Rényi")