if (FALSE) {
## This example takes some time to run!
summary(House2001)
## Put the votes in a matrix, and discard members with too many NAs etc:
House2001m <- as.matrix(House2001[-1])
informative <- apply(House2001m, 1, function(row){
valid <- !is.na(row)
validSum <- if (any(valid)) sum(row[valid]) else 0
nValid <- sum(valid)
uninformative <- (validSum == nValid) || (validSum == 0) || (nValid < 10)
!uninformative})
House2001m <- House2001m[informative, ]
## Make a vector of colours, blue for Republican and red for Democrat:
parties <- House2001$party[informative]
partyColors <- rep("black", length(parties))
partyColors <- ifelse(parties == "D", "red", partyColors)
partyColors <- ifelse(parties == "R", "blue", partyColors)
## Expand the data for statistical modelling:
House2001v <- as.vector(House2001m)
House2001f <- data.frame(member = rownames(House2001m),
party = parties,
rollCall = factor(rep((1:20),
rep(nrow(House2001m), 20))),
vote = House2001v)
## Now fit an "empty" model, in which all members vote identically:
baseModel <- glm(vote ~ -1 + rollCall, family = binomial, data = House2001f)
## From this, get starting values for a one-dimensional multiplicative term:
Start <- residSVD(baseModel, rollCall, member)
##
## Now fit the logistic model with one multiplicative term.
## For the response variable, instead of vote=0,1 we use 0.03 and 0.97,
## corresponding approximately to a bias-reducing adjustment of p/(2n),
## where p is the number of parameters and n the number of observations.
##
voteAdj <- 0.5 + 0.94*(House2001f$vote - 0.5)
House2001model1 <- gnm(voteAdj ~ Mult(rollCall, member),
eliminate = rollCall,
family = binomial, data = House2001f,
na.action = na.exclude, trace = TRUE, tolerance = 1e-03,
start = -Start)
## Deviance is 2234.847, df = 5574
##
## Plot the members' positions as estimated in the model:
##
memberParameters <- pickCoef(House2001model1, "member")
plot(coef(House2001model1)[memberParameters], col = partyColors,
xlab = "Alphabetical index (Abercrombie 1 to Young 301)",
ylab = "Member's relative position, one-dimensional model")
## Can do the same thing with two dimensions, but gnm takes around 40
## slow iterations to converge (there are more than 600 parameters):
Start2 <- residSVD(baseModel, rollCall, member, d = 2)
House2001model2 <- gnm(
voteAdj ~ instances(Mult(rollCall - 1, member - 1), 2),
eliminate = rollCall,
family = binomial, data = House2001f,
na.action = na.exclude, trace = TRUE, tolerance = 1e-03,
start = Start2, lsMethod = "qr")
## Deviance is 1545.166, df = 5257
##
memberParameters1 <- pickCoef(House2001model2, "1).member")
memberParameters2 <- pickCoef(House2001model2, "2).member")
plot(coef(House2001model2)[memberParameters1],
coef(House2001model2)[memberParameters2],
col = partyColors,
xlab = "Dimension 1",
ylab = "Dimension 2",
main = "House2001 data: Member positions, 2-dimensional model")
##
## The second dimension is mainly due to rollCall 12, which does not
## correlate well with the rest -- look at the coefficients of
## House2001model1, or at the 12th row of
cormat <- cor(na.omit(House2001m))
}
Run the code above in your browser using DataLab