# create some data:
(x <- rbinom(50, size = 1, prob = 0.3))
## [1] 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 1
For a proportion test, the data can be provided as the number of successes (x
) and the number of trials (n
)
prop.test(x = sum(x), n = length(x), p = 0.4)
##
## 1-sample proportions test with continuity correction
##
## data: sum(x) out of length(x), null probability 0.4
## X-squared = 7.5208, df = 1, p-value = 0.006099
## alternative hypothesis: true p is not equal to 0.4
## 95 percent confidence interval:
## 0.1050216 0.3414368
## sample estimates:
## p
## 0.2
or as a matrix containing the number of successes and failures:
M <- matrix(data = c(sum(x), sum(1 - x)),
nrow = 1, ncol = 2,
dimnames = list(c(), c('success', 'failure'))
)
M
## success failure
## [1,] 10 40
prop.test(M, p = 0.4)
##
## 1-sample proportions test with continuity correction
##
## data: M, null probability 0.4
## X-squared = 7.5208, df = 1, p-value = 0.006099
## alternative hypothesis: true p is not equal to 0.4
## 95 percent confidence interval:
## 0.1050216 0.3414368
## sample estimates:
## p
## 0.2
If the argument p
is unspecified, p = 0.5
is used. Like in the t.test()
we can choose a one- or two-sided null hypothesis using the argument alternative
, and the confidence level using conf.level
. When correct = TRUE
Yate’s continuity correction is used.
The function binom.test()
works the same as prop.test()
, but performs an exact test.
binom.test(x = sum(x), n = length(x), p = 0.4)
##
## Exact binomial test
##
## data: sum(x) and length(x)
## number of successes = 10, number of trials = 50, p-value = 0.003571
## alternative hypothesis: true probability of success is not equal to 0.4
## 95 percent confidence interval:
## 0.1003022 0.3371831
## sample estimates:
## probability of success
## 0.2
Categorical data in multiple categories are usually displayed in a table:
X <- matrix(data = sample(5:50, size = 6),
nrow = 2, ncol = 3,
dimnames = list(c('exposed', 'non-exposed'),
c('none', 'mild', 'severe'))
)
X
## none mild severe
## exposed 40 36 6
## non-exposed 12 46 19
The function chisq.test()
performs Pearson’s Chi-squared test. For this test (or for Fisher’s Exact test) it does not matter which variable goes into the rows and which into the columns:
chisq.test(X)
##
## Pearson's Chi-squared test
##
## data: X
## X-squared = 22.922, df = 2, p-value = 1.053e-05
chisq.test(t(X))
##
## Pearson's Chi-squared test
##
## data: t(X)
## X-squared = 22.922, df = 2, p-value = 1.053e-05
simulate.p.value = TRUE
. Then, the argument B
specifies the number of simulations used to calculate the p-value.chisq.test(X, simulate.p.value = TRUE, B = 1e5)
##
## Pearson's Chi-squared test with simulated p-value (based on 1e+05 replicates)
##
## data: X
## X-squared = 22.922, df = NA, p-value = 1e-05
Note that simulation can result in different p-values every time, especially when B
is small:
set.seed(1234)
chisq.test(X, simulate.p.value = TRUE, B = 200)
##
## Pearson's Chi-squared test with simulated p-value (based on 200 replicates)
##
## data: X
## X-squared = 22.922, df = NA, p-value = 0.004975
chisq.test(X, simulate.p.value = TRUE, B = 200)
##
## Pearson's Chi-squared test with simulated p-value (based on 200 replicates)
##
## data: X
## X-squared = 22.922, df = NA, p-value = 0.004975
Specification is also possible via two factors:
x <- factor(sample(c('a', 'b'), size = 100, replace = TRUE))
y <- factor(sample(c('yes', 'no'), size = 100, replace = TRUE,
prob = c(0.3, 0.7)))
table(x, y)
## y
## x no yes
## a 32 20
## b 38 10
chisq.test(x, y, correct = FALSE)
##
## Pearson's Chi-squared test
##
## data: x and y
## X-squared = 3.6935, df = 1, p-value = 0.05462
chisq.test(table(x, y), correct = FALSE)
##
## Pearson's Chi-squared test
##
## data: table(x, y)
## X-squared = 3.6935, df = 1, p-value = 0.05462
fisher.test()
takes similar arguments as chisq.test()
and can be used when there are combinations with no observations:
X[2, 3] <- 0
X
## none mild severe
## exposed 40 36 6
## non-exposed 12 46 0
fisher.test(X)
##
## Fisher's Exact Test for Count Data
##
## data: X
## p-value = 3.631e-05
## alternative hypothesis: two.sided
simulate.p.value
and B
work like for chisq.test()
.alternative
are only available for 2x2 tables:fisher.test(X[, -3], conf.int = TRUE, alternative = 'less')
##
## Fisher's Exact Test for Count Data
##
## data: X[, -3]
## p-value = 1
## alternative hypothesis: true odds ratio is less than 1
## 95 percent confidence interval:
## 0.000000 8.880574
## sample estimates:
## odds ratio
## 4.211748
M <- matrix(data = sample(1:50, size = 4),
nrow = 2, ncol = 2,
dimnames = list(before = c('no', 'yes'), after = c('no', 'yes')))
M
## after
## before no yes
## no 48 22
## yes 11 2
mcnemar.test(M)
##
## McNemar's Chi-squared test with continuity correction
##
## data: M
## McNemar's chi-squared = 3.0303, df = 1, p-value = 0.08172
The mcnemar.test()
also has the option to switch off the continuity correction by setting correct = FALSE
.
Specification for the test is also possible via two factors:
x <- factor(sample(c('yes', 'no'), size = 100, replace = TRUE))
y <- factor(sample(c('yes', 'no'), size = 100, replace = TRUE,
prob = c(0.3, 0.7)))
Note that in this example the data are independent, but in a real case we would have paired observations.
mcnemar.test(x, y)
##
## McNemar's Chi-squared test with continuity correction
##
## data: x and y
## McNemar's chi-squared = 16.569, df = 1, p-value = 4.691e-05
table(x, y)
## y
## x no yes
## no 31 13
## yes 45 11
mcnemar.test(table(x,y))
##
## McNemar's Chi-squared test with continuity correction
##
## data: table(x, y)
## McNemar's chi-squared = 16.569, df = 1, p-value = 4.691e-05
x <- factor(sample(c('exposed', 'not exposed'), size = 100, replace = TRUE))
y <- factor(sample(c('yes', 'no'), size = 100, replace = TRUE))
stratum <- factor(sample(c('A', 'B', 'C'), size = 100, replace = TRUE))
table(x, y, stratum)
## , , stratum = A
##
## y
## x no yes
## exposed 11 3
## not exposed 11 12
##
## , , stratum = B
##
## y
## x no yes
## exposed 11 4
## not exposed 8 7
##
## , , stratum = C
##
## y
## x no yes
## exposed 8 6
## not exposed 11 8
mantelhaen.test(x = x, y = y, z = stratum)
##
## Mantel-Haenszel chi-squared test with continuity correction
##
## data: x and y and stratum
## Mantel-Haenszel X-squared = 2.1824, df = 1, p-value = 0.1396
## alternative hypothesis: true common odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.8880494 4.6841005
## sample estimates:
## common odds ratio
## 2.039537
mantelhaen.test(table(x, y, stratum))
##
## Mantel-Haenszel chi-squared test with continuity correction
##
## data: table(x, y, stratum)
## Mantel-Haenszel X-squared = 2.1824, df = 1, p-value = 0.1396
## alternative hypothesis: true common odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.8880494 4.6841005
## sample estimates:
## common odds ratio
## 2.039537
The arguments alternative
, correct
, exact
and conf.level
can be used like for the tests before, but only in the case of a \(2 \times 2 \times K\) table.