# R code for: # "Grimpe, C. and R. Patuelli (2009). Regional Knowledge Production in Nanomaterials: A Spatial Filtering Approach. The Annals of Regional Science (forthcoming)" update.packages() options(digits = 4) library(dispmod) library(MASS) library(spdep) library(effects) library(lmtest) library(sandwich) library(AER) library(pscl) # reads data (data for dependent variable is proprietary, cannot be distributed) inp = read.table("prepare_cra.dat", header = TRUE) # further files being read omitted here... # definition of variables from dataframes omitted here... # can be substitute by calling up the dataframe directly into the regression command # histogram patents rrr<- hist(dep, breaks = 100, xlab = "Nanomaterial patents", ylab = "Number of German NUTS-3 districts", main = NULL)$breaks hist(dep, breaks = c(-1, rrr), xlab = "Nanomaterial patents", ylab = "Number of German NUTS-3 districts", main = NULL) # loads spatial weights matrix - here we start directly from a previously-generated binary rook contiguity matrix W = read.table("weights C rook.dat", header = FALSE) W = as.matrix(W) W.listw = mat2listw(W, row.names = NULL) W.listc = nb2listw(W.listw$neighbours, style = "C") # recreates matrix but standardized according to C-style W_C = nb2mat(W.listc$neighbours, style = "C") # overdispersion test (to show that regular Poisson is not appropriate) dep.pois <- glm(dep ~ manuf00 + corps00 + lngdp00 + lngdp00sq + lngdp00cub + lnpop00 + indsh + scish + mechsh + elecsh + chemsh + drugsh + factor(central_dummy) + factor(urban_dummy) + factor(aggl_dummy), family = poisson) dispersiontest(dep.pois) # check how many zeros in Poisson estimate (frequencies) rbind(obs = table(dep)[1:10], exp = round(sapply(0:9, function(x) sum(dpois(x, fitted(dep.pois)))))) # nb with all explanatory variables - basic non-spatial negative binomial estimation dep.nb = glm.nb(dep ~ manuf00 + corps00 + lngdp00 + lngdp00sq + lnpop00 + indsh + scish + mechsh + elecsh + chemsh + drugsh + central_dummy + urban_dummy + aggl_dummy, maxit = 50) # residuals show spatial autocorrelation moran.test(residuals.glm(dep.nb), W.listc, zero.policy = FALSE, alternative = "two.sided") # check how many zeros in negbin estimate (frequencies) rbind(obs = table(dep)[1:10], exp = round(sapply(0:9, function(x) sum(dpois(x, fitted(dep.nb)))))) # reads eigenvectors from previously-prepared file eigenvecs = read.table("eigenvecs_rook_C-coding.dat", header = FALSE) e1 = eigenvecs[[2]]; e2 = eigenvecs[[3]]; e3 = eigenvecs[[4]]; e4 = eigenvecs[[5]]; e5 = eigenvecs[[6]]; e6 = eigenvecs[[7]]; e7 = eigenvecs[[8]] e8 = eigenvecs[[9]]; e9 = eigenvecs[[10]]; e10 = eigenvecs[[11]]; e11 = eigenvecs[[12]]; e12 = eigenvecs[[13]]; e13 = eigenvecs[[14]]; e14 = eigenvecs[[15]] e15 = eigenvecs[[16]]; e16 = eigenvecs[[17]]; e17 = eigenvecs[[18]]; e18 = eigenvecs[[19]]; e19 = eigenvecs[[20]]; e20 = eigenvecs[[21]]; e21 = eigenvecs[[22]] e22 = eigenvecs[[23]]; e23 = eigenvecs[[24]]; e24 = eigenvecs[[25]]; e25 = eigenvecs[[26]]; e26 = eigenvecs[[27]]; e27 = eigenvecs[[28]]; e28 = eigenvecs[[29]] e29 = eigenvecs[[30]]; e30 = eigenvecs[[31]]; e31 = eigenvecs[[32]]; e32 = eigenvecs[[33]]; e33 = eigenvecs[[34]]; e34 = eigenvecs[[35]]; e35 = eigenvecs[[36]] e36 = eigenvecs[[37]]; e37 = eigenvecs[[38]]; e38 = eigenvecs[[39]]; e39 = eigenvecs[[40]]; e40 = eigenvecs[[41]]; e41 = eigenvecs[[42]]; e42 = eigenvecs[[43]] e43 = eigenvecs[[44]]; e44 = eigenvecs[[45]]; e45 = eigenvecs[[46]]; e46 = eigenvecs[[47]]; e47 = eigenvecs[[48]]; e48 = eigenvecs[[49]]; e49 = eigenvecs[[50]] e50 = eigenvecs[[51]]; e51 = eigenvecs[[52]]; e52 = eigenvecs[[53]]; e53 = eigenvecs[[54]]; e54 = eigenvecs[[55]]; e55 = eigenvecs[[56]]; e56 = eigenvecs[[57]] e57 = eigenvecs[[58]]; e58 = eigenvecs[[59]]; e59 = eigenvecs[[60]]; e60 = eigenvecs[[61]]; e61 = eigenvecs[[62]]; e62 = eigenvecs[[63]]; e63 = eigenvecs[[64]] e64 = eigenvecs[[65]]; e65 = eigenvecs[[66]]; e66 = eigenvecs[[67]]; e67 = eigenvecs[[68]]; e68 = eigenvecs[[69]]; e69 = eigenvecs[[70]]; e70 = eigenvecs[[71]] e71 = eigenvecs[[72]]; e72 = eigenvecs[[73]]; e73 = eigenvecs[[74]]; e74 = eigenvecs[[75]]; e75 = eigenvecs[[76]]; e76 = eigenvecs[[77]]; e77 = eigenvecs[[78]] e78 = eigenvecs[[79]]; e79 = eigenvecs[[80]]; e80 = eigenvecs[[81]]; e81 = eigenvecs[[82]]; e82 = eigenvecs[[83]]; e83 = eigenvecs[[84]]; e84 = eigenvecs[[85]] e85 = eigenvecs[[86]]; e86 = eigenvecs[[87]]; e87 = eigenvecs[[88]]; e88 = eigenvecs[[89]]; e89 = eigenvecs[[90]]; e90 = eigenvecs[[91]]; e91 = eigenvecs[[92]] e92 = eigenvecs[[93]]; e93 = eigenvecs[[94]]; e94 = eigenvecs[[95]]; e95 = eigenvecs[[96]]; e96 = eigenvecs[[97]]; e97 = eigenvecs[[98]]; e98 = eigenvecs[[99]] # this procedure could be simpler if the eigenvectors were added to a general dataframe instead of being defined one by one # The eigenvectors can easily be obtained - and the set of candidate eigenvectors be defined - through the following procedure # generates projection matrix # nW = dim(W_C)[[1]] # idW = diag(1, nW, nW) # vec1W = as.matrix(diag(idW)) # vec1vec1W = vec1W %*% t(vec1W) # vec1vec1nW = vec1vec1W / nW # idvec1vec1nW = idW - vec1vec1nW # wW_C = idvec1vec1nW %*% W_C %*% idvec1vec1nW # extracts the eigenvectors # eigen_resW_C = eigen(wW_C, only.values = FALSE) # summary(eigen_resW_C) # eigenvecsW_C = eigen_resW_C$vectors # write(t(eigenvecsW_C), ncolumns = nW, file = "eigenvecsW_C.txt") # compute Moran's I for eigenvectors # moran.scoresW = as.matrix(vector(mode = "numeric", length = nW)) # yesnoW = as.matrix(vector(mode = "numeric", length = nW)) # candW = matrix(0, nW, 1) # for (i in 1:nW) { # moranW = moran.test(eigenvecsW_C[,i], W.listc, zero.policy = FALSE, alternative = "two.sided") # moran.scoresW[[i]] = moranW$estimate[1] # if (moran.scoresW[[i]]/moran.scoresW[[1]] >= 0.25) yesnoW[[i]] = 1 else yesnoW[[i]] = 0 # if (moran.scoresW[[i]]/moran.scoresW[[1]] >= 0.25) candW = cbind(candW, eigenvecsW_C[,i]) # } # candW = candW[,2:dim(candW)[[2]]] # write(t(candW), ncolumns = dim(candW)[[2]], file = "cand_eigenvecsW_C.txt") # moran.scoresW[,1]/moran.scoresW[[1]] # stepwise nb to select spatial filter # only to be run the first time, not necessary once the spatial filter is known # defines larger model including 98 candidate eigenvectors depsf.nb = glm.nb(dep ~ manuf00 + corps00 + lngdp00 + lngdp00sq + lnpop00 + indsh + scish + mechsh + elecsh + chemsh + drugsh + central_dummy + urban_dummy + aggl_dummy + e1 + e2 + e3 + e4 + e5 + e6 + e7 + e8 + e9 + e10 + e11 + e12 + e13 + e14 + e15 + e16 + e17 + e18 + e19 + e20 + e21 + e22 + e23 + e24 + e25 + e26 + e27 + e28 + e29 + e30 + e31 + e32 + e33 + e34 + e35 + e36 + e37 + e38 + e39 + e40 + e41 + e42 + e43 + e44 + e45 + e46 + e47 + e48 + e49 + e50 + e51 + e52 + e53 + e54 + e55 + e56 + e57 + e58 + e59 + e60 + e61 + e62 + e63 + e64 + e65 + e66 + e67 + e68 + e69 + e70 + e71 + e72 + e73 + e74 + e75 + e76 + e77 + e78 + e79 + e80 + e81 + e82 + e83 + e84 + e85 + e86 + e87 + e88 + e89 + e90 + e91 + e92 + e93 + e94 + e95 + e96 + e97 + e98, maxit = 50) # stewise to choose spatial filter stepdepsf.nb = step(depsf.nb, scope = list(lower = ~ manuf00 + corps00 + lngdp00 + lngdp00sq + lnpop00 + indsh + scish + mechsh + elecsh + chemsh + drugsh + central_dummy + urban_dummy + aggl_dummy)) # look for terms to drop since the step function tends to overfit # otherwise a stricter selection can be made by using BIC instead of AIC in the stepwise # stepdepsf.nb <- step(depsf.nb, scope = list(lower = ~ manuf00 + corps00 + lngdp00 + lngdp00sq + lngdp00cub + lnpop00 + indsh + scish + mechsh + elecsh + chemsh + drugsh + central_dummy + urban_dummy + aggl_dummy), k = log(length(dep)), direction = "backward") dropterm(stepdepsf.nb, test = "Chisq") # in consecutive steps, the following eigenvectors are dropped: e61 e81 e59 e10 e24 e11 e23 e92 e79 e83 e16 e30 e20 stepdepsf.nb = update(stepdepsf.nb, . ~ . - e20) # this being the last drop command dropterm(stepdepsf.nb, test = "Chisq") summary(stepdepsf.nb) # robust standard errors coeftest(depSF.nb, vcov = sandwich) moran.test(residuals.glm(stepdepsf.nb), W.listc, zero.policy = FALSE, alternative = "two.sided") # write(residuals.glm(stepdepsf.nb), ncolumns = 1, file = "res_stepsfnb_cra.txt") # obtains a rough pseudo-R sqared fitted.stepnb = stepdepsf.nb$fitted check.stepnb = lm(dep ~ fitted.stepnb - 1) summary(check.stepnb) # McFadden's pseudo-R2 dep.nb0 <- update(dep.nb, formula = . ~ 1) 1 - as.vector(logLik(dep.nb) / logLik(dep.nb0)) # nb with all explanatory variables + SF # uses the spatial filter selected above depSF.nb = glm.nb(dep ~ manuf00 + corps00 + lngdp00 + lngdp00sq + lnpop00 + indsh + scish + mechsh + elecsh + chemsh + drugsh + central_dummy + urban_dummy + aggl_dummy + e1 + e2 + e4 + e5 + e7 + e12 + e14 + e15 + e17 + e18 + e25 + e28 + e29 + e32 + e38 + e44 + e48 + e50 + e51 + e52 + e54 + e55 + e56 + e58 + e60 + e62 + e63 + e64 + e67 + e68 + e70 + e73 + e78 + e84 + e86 + e89 + e91 + e94, maxit = 100) summary(depSF.nb) coeftest(depSF.nb, vcov = sandwich) moran.test(residuals.glm(depSF.nb), W.listc, alternative = "two.sided") # computes the linear combination of eigenvectors to obtain a single variable: the spatial filter coeffdepSF.nb = as.matrix(depSF.nb$coefficients) SFcoeff = coeffdepSF.nb[16:dim(coeffdepSF.nb)[[1]],] SFeigenvecs = cbind(e1, e2, e4, e5, e7, e12, e14, e15, e17, e18, e25, e28, e29, e32, e38, e44, e48, e50, e51, e52, e54, e55, e56, e58, e60, e62, e63, e64, e67, e68, e70, e73, e78, e84, e86, e89, e91, e94) SF = SFeigenvecs %*% SFcoeff # write(SF, ncolumns = 1, file = "sfnewvars4int_(nb).txt") # nb with all explanatory variables + SF as one variable # just to obtain a standard error for the SF depSF.nb = glm.nb(dep ~ manuf00 + corps00 + lngdp00 + lngdp00sq + lnpop00 + indsh + scish + mechsh + elecsh + chemsh + drugsh + central_dummy + urban_dummy + aggl_dummy + SF, maxit = 100) summary(depSF.nb) coeftest(depSF.nb, vcov = sandwich) moran.test(residuals.glm(depSF.nb), W.listc, alternative = "two.sided") ################ repeat with interaction term (Model 2) ################ depsf.nb <- glm.nb(dep ~ manuf00 + corps00 + lngdp00 + lngdp00sq + lnpop00 + indsh * scish + mechsh + elecsh + chemsh + drugsh + central_dummy + urban_dummy + aggl_dummy + e1 + e2 + e3 + e4 + e5 + e6 + e7 + e8 + e9 + e10 + e11 + e12 + e13 + e14 + e15 + e16 + e17 + e18 + e19 + e20 + e21 + e22 + e23 + e24 + e25 + e26 + e27 + e28 + e29 + e30 + e31 + e32 + e33 + e34 + e35 + e36 + e37 + e38 + e39 + e40 + e41 + e42 + e43 + e44 + e45 + e46 + e47 + e48 + e49 + e50 + e51 + e52 + e53 + e54 + e55 + e56 + e57 + e58 + e59 + e60 + e61 + e62 + e63 + e64 + e65 + e66 + e67 + e68 + e69 + e70 + e71 + e72 + e73 + e74 + e75 + e76 + e77 + e78 + e79 + e80 + e81 + e82 + e83 + e84 + e85 + e86 + e87 + e88 + e89 + e90 + e91 + e92 + e93 + e94 + e95 + e96 + e97 + e98, maxit = 50) stepdepsf.nb <- step(depsf.nb, scope = list(lower = ~ manuf00 + corps00 + lngdp00 + lngdp00sq + lnpop00 + indsh * scish + mechsh + elecsh + chemsh + drugsh + central_dummy + urban_dummy + aggl_dummy)) moran.test(residuals.glm(stepdepsf.nb), W.listc, zero.policy = FALSE, alternative = "two.sided") dropterm(stepdepsf.nb, test = "Chisq") # drop e65 e59 e87 e3 e86 e76 e93 e20 e61 e57 e79 e16 e84 e33 e10 e11 e9 e24 e67 e68 e81 e48 stepdepsf.nb <- update(stepdepsf.nb, . ~ . - e48) summary(stepdepsf.nb) moran.test(residuals.glm(stepdepsf.nb), W.listc, zero.policy = FALSE, alternative = "two.sided") dropterm(stepdepsf.nb, test = "Chisq") fitted.stepnb <- stepdepsf.nb$fitted check.stepnb <- lm(dep ~ fitted.stepnb - 1) summary(check.stepnb) # nb with all explanatory variables + SF depSF.nb <- glm.nb(dep ~ manuf00 + corps00 + lngdp00 + lngdp00sq + lnpop00 + indsh * scish + mechsh + elecsh + chemsh + drugsh + central_dummy + urban_dummy + aggl_dummy + e1 + e2 + e4 + e5 + e7 + e12 + e13 + e14 + e15 + e18 + e25 + e28 + e29 + e30 + e38 + e44 + e50 + e51 + e52 + e54 + e55 + e56 + e58 + e60 + e62 + e63 + e64 + e70 + e73 + e78 + e89 + e91 + e94, maxit = 1000) summary(depSF.nb) coeftest(depSF.nb, vcov = sandwich) moran.test(residuals.glm(depSF.nb), W.listc, alternative = "two.sided") coeffdepSF.nb <- as.matrix(depSF.nb$coefficients) SFcoeff <- coeffdepSF.nb[16:(dim(coeffdepSF.nb)[[1]] - 1),] SFeigenvecs <- cbind(e1, e2, e4, e5, e7, e12, e13, e14, e15, e18, e25, e28, e29, e30, e38, e44, e50, e51, e52, e54, e55, e56, e58, e60, e62, e63, e64, e70, e73, e78, e89, e91, e94) SF <- SFeigenvecs %*% SFcoeff # check how many zeros in negbin estimate (frequencies) rbind(obs = table(dep)[1:10], exp = round(sapply(0:9, function(x) sum(dpois(x, fitted(depSF.nb)))))) # nb with all explanatory variables + SF as one variable depSF.nb <- glm.nb(dep ~ manuf00 + corps00 + lngdp00 + lngdp00sq + lnpop00 + indsh*scish + mechsh + elecsh + chemsh + drugsh + central_dummy + urban_dummy + aggl_dummy + SF, maxit = 100) summary(depSF.nb) coeftest(depSF.nb, vcov = sandwich) moran.test(residuals.glm(depSF.nb), W.listc, alternative = "two.sided")