svdcov

The goal of svdcov is to calculate the covariance matrix of rectangular data using SVD or shrinkage methods.

Installation

You can install the released version of svdcov from github with:

devtools::intall_github("github.io/jtannen/svdcov")

Example

library(svdcov)

# Consider a block-diagonal example.
cov <- matrix(0, 10, 10)
cov[row(cov) <=5 & col(cov) <= 5] <- 0.5
cov[row(cov) > 5 & col(cov) > 5] <- 0.5
diag(cov) <- 1

data <- MASS::mvrnorm(
  n=100, 
  mu=1:10, 
  Sigma=cov
)

data <- t(data)

res <- get_svd(data, n_svd=2)

res@row_cov
get_fitted(res)

sample_from_posterior(
  res,
  obs=c(1.42, 7.34),
  obs_id=c(1, 6)
)

## Example with Philadelphia elections data
library(dplyr)
library(sf)
library(ggplot2)

data("phila_elections")
data("phila_divs")

phila_elections <- phila_elections %>%
  filter(
    !grepl("WRITE(\\s|-)IN", candidate, ignore.case=T),
    substr(warddiv, 1, 2) != "99", # overseas
    is.na(district) # city-wide races
  )

competitive_races <- phila_elections %>% 
  group_by(office, election_type, year, candidate) %>%
  summarise(votes=sum(votes), .groups="drop") %>%
  group_by(office, election_type, year) %>%
  summarise(is_competitive = sum(votes > 100) > 2) %>%
  filter(is_competitive)
  
# mat is a matrix with division rows, and candidate columns
mat <- phila_elections %>% 
  inner_join(competitive_races) %>%
  mutate(column_name = paste(candidate, office, party, election_type, year, sep="::")) %>%
  group_by(office, year, election_type, warddiv) %>%
  mutate(log_pvote = log((votes + 1) / sum(votes+1))) %>%
  ungroup() %>%
  pivot_wider(
    id_cols = "warddiv", 
    names_from="column_name", 
    values_from="log_pvote",
    values_fill=list(log_pvote=log(0.001))
  )
  
mat <- mat %>% tibble::column_to_rownames("warddiv")
mat <- as.matrix(mat)

row_ward <- substr(row.names(mat), 1, 2)
svd_par <- get_svd(
  mat, 
  n_svd=3,
  row_groups=row_ward,  ## cluster extra variance by ward
  is_logged=TRUE
)

ggplot(
  divs %>% left_join(svd_par@row_scores, by=c("warddiv"="row"))
) + 
  geom_sf(aes(fill=score.1), color=NA) +
  scale_fill_gradient2() +
  theme_minimal()
  
svd_par@col_scores %>% arrange(desc(score.1)) %>% head()

## Suppose we have a weak prior that a candidate will receive 25% of the vote,
## and then see they received 50% of the vote in "27-14".
## How will the do in "01-01"?

sample <- sample_from_posterior(
  svd_par,
  obs=c(log(0.5), log(0.5)),
  obs_id=c("01-02", "01-03"),
  col_mean_prior=log(0.25),
  col_mean_prior_var=1
)

# Note: the model doesn't know these are percents, so some are > 1.
sample %>% 
  filter(row=="01-01") %>%
  with(hist(exp(value)))

Name		Name	Last commit message	Last commit date
Latest commit History 9 Commits
R		R
data		data
man		man
tests		tests
.Rbuildignore		.Rbuildignore
.gitignore		.gitignore
DESCRIPTION		DESCRIPTION
LICENSE		LICENSE
LICENSE.md		LICENSE.md
NAMESPACE		NAMESPACE
README.md		README.md
svdcov.Rproj		svdcov.Rproj

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Licenses found

Uh oh!

Repository files navigation

svdcov

Installation

Example

About

Licenses found

Uh oh!

Releases

Packages

Languages

License

Licenses found

jtannen/svdcov

Folders and files

Latest commit

History

Repository files navigation

svdcov

Installation

Example

About

Resources

License

Licenses found

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages