If you are using the package for the first time, you will have to first install it
# install.packages("survival")
library(survival)
Load data sets from package
pbc <- survival::pbc
pbcseq <- survival::pbcseq
Obtain the mean of columns time
and age
in the pbc
data set
apply(pbc[, c(2,5)], 2, mean)
## time age
## 1917.78230 50.74155
apply(pbc[, c("time", "age")], 2, mean)
## time age
## 1917.78230 50.74155
Obtain the mean of rows in the pbc
data set
Before running the code, think if it is meaningful
# apply(pbc[, ], 1, mean)
Obtain the standardized values of columns time
, age
and bili
in the pbc
data set
head(apply(pbc[, c("time", "age", "bili")], 2, function(x) { (x-mean(x))/sd(x) } ))
## time age bili
## [1,] -1.373965243 0.7680208 2.55908571
## [2,] 2.337540359 0.5460516 -0.48118215
## [3,] -0.819955139 1.8503499 -0.41311645
## [4,] 0.006533792 0.3827850 -0.32236219
## [5,] -0.374574466 -1.2095228 0.04065487
## [6,] 0.529765557 1.4852931 -0.54924785
Other examples
Create a matrix
X <- sample(0:200, 100)
Mat <- matrix(X, 50, 50)
Obtain the mean value of each row for matrix Mat
apply(Mat, 1, mean)
## [1] 102.5 64.0 184.5 89.0 120.5 120.0 84.5 90.5 31.0 30.5 77.0 119.0 166.0 167.0 169.5 138.5
## [17] 57.0 64.0 65.5 131.5 94.5 88.5 59.0 78.0 145.0 56.0 69.0 106.5 96.5 179.5 42.5 50.0
## [33] 117.5 90.0 132.5 124.0 46.0 79.5 72.5 79.5 87.0 39.0 64.5 110.0 125.5 128.5 117.5 126.5
## [49] 131.5 58.0
Obtain the mean value of each column for matrix Mat
apply(Mat, 2, mean)
## [1] 102.82 91.84 102.82 91.84 102.82 91.84 102.82 91.84 102.82 91.84 102.82 91.84 102.82 91.84
## [15] 102.82 91.84 102.82 91.84 102.82 91.84 102.82 91.84 102.82 91.84 102.82 91.84 102.82 91.84
## [29] 102.82 91.84 102.82 91.84 102.82 91.84 102.82 91.84 102.82 91.84 102.82 91.84 102.82 91.84
## [43] 102.82 91.84 102.82 91.84 102.82 91.84 102.82 91.84
Calculate the sum of each column for matrix Mat
apply(Mat, 2, sum)
## [1] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592
## [21] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592
## [41] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592
apply(Mat, 2, function(x) { sum(x) } )
## [1] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592
## [21] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592
## [41] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592
Calculate the sum of each row for matrix Mat
apply(Mat, 1, sum)
## [1] 5125 3200 9225 4450 6025 6000 4225 4525 1550 1525 3850 5950 8300 8350 8475 6925 2850 3200 3275 6575
## [21] 4725 4425 2950 3900 7250 2800 3450 5325 4825 8975 2125 2500 5875 4500 6625 6200 2300 3975 3625 3975
## [41] 4350 1950 3225 5500 6275 6425 5875 6325 6575 2900
apply(Mat, 1, function(x) { sum(x) } )
## [1] 5125 3200 9225 4450 6025 6000 4225 4525 1550 1525 3850 5950 8300 8350 8475 6925 2850 3200 3275 6575
## [21] 4725 4425 2950 3900 7250 2800 3450 5325 4825 8975 2125 2500 5875 4500 6625 6200 2300 3975 3625 3975
## [41] 4350 1950 3225 5500 6275 6425 5875 6325 6575 2900
There is no one way of doing things in R!
colSums(Mat)
## [1] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592
## [21] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592
## [41] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592
rowSums(Mat)
## [1] 5125 3200 9225 4450 6025 6000 4225 4525 1550 1525 3850 5950 8300 8350 8475 6925 2850 3200 3275 6575
## [21] 4725 4425 2950 3900 7250 2800 3450 5325 4825 8975 2125 2500 5875 4500 6625 6200 2300 3975 3625 3975
## [41] 4350 1950 3225 5500 6275 6425 5875 6325 6575 2900
Obtain the summary of the pbc
data set
lapply(pbc, summary)
## $id
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 105.2 209.5 209.5 313.8 418.0
##
## $time
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 41 1093 1730 1918 2614 4795
##
## $status
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.8301 2.0000 2.0000
##
## $trt
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 1.000 1.000 1.494 2.000 2.000 106
##
## $age
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 26.28 42.83 51.00 50.74 58.24 78.44
##
## $sex
## m f
## 44 374
##
## $ascites
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00000 0.00000 0.00000 0.07692 0.00000 1.00000 106
##
## $hepato
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.0000 1.0000 0.5128 1.0000 1.0000 106
##
## $spiders
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.0000 0.0000 0.2885 1.0000 1.0000 106
##
## $edema
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.1005 0.0000 1.0000
##
## $bili
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.300 0.800 1.400 3.221 3.400 28.000
##
## $chol
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 120.0 249.5 309.5 369.5 400.0 1775.0 134
##
## $albumin
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.960 3.243 3.530 3.497 3.770 4.640
##
## $copper
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 4.00 41.25 73.00 97.65 123.00 588.00 108
##
## $alk.phos
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 289.0 871.5 1259.0 1982.7 1980.0 13862.4 106
##
## $ast
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 26.35 80.60 114.70 122.56 151.90 457.25 106
##
## $trig
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 33.00 84.25 108.00 124.70 151.00 598.00 136
##
## $platelet
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 62.0 188.5 251.0 257.0 318.0 721.0 11
##
## $protime
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 9.00 10.00 10.60 10.73 11.10 18.00 2
##
## $stage
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 2.000 3.000 3.024 4.000 4.000 6
Ontain the number of missing values per pbc
variable
lapply(pbc, function(x) { sum(is.na(x)) } )
## $id
## [1] 0
##
## $time
## [1] 0
##
## $status
## [1] 0
##
## $trt
## [1] 106
##
## $age
## [1] 0
##
## $sex
## [1] 0
##
## $ascites
## [1] 106
##
## $hepato
## [1] 106
##
## $spiders
## [1] 106
##
## $edema
## [1] 0
##
## $bili
## [1] 0
##
## $chol
## [1] 134
##
## $albumin
## [1] 0
##
## $copper
## [1] 108
##
## $alk.phos
## [1] 106
##
## $ast
## [1] 106
##
## $trig
## [1] 136
##
## $platelet
## [1] 11
##
## $protime
## [1] 2
##
## $stage
## [1] 6
Other examples
Obtain the quadratic term of the vector 1:3
Present the results as a list
lapply(1:3, function(x) { x^2 } )
## [[1]]
## [1] 1
##
## [[2]]
## [1] 4
##
## [[3]]
## [1] 9
Create a list that consist of Mat
and Mat^2
Obtain the mean of each element
Present the results as a list
X <- list(Mat, Mat^2)
lapply(X, mean)
## [[1]]
## [1] 97.33
##
## [[2]]
## [1] 12691.67
Create a list
A <- matrix(1:9, 3,3)
B <- matrix(4:15, 4,3)
C <- matrix(8:10, 3,2)
Select elements in a list
MyList <- list(A, B, C)
Select the first row of each element
Present the results as a list
lapply(MyList,"[", 1, )
## [[1]]
## [1] 1 4 7
##
## [[2]]
## [1] 4 8 12
##
## [[3]]
## [1] 8 8
Select the second column of each element
Present the results as a list
lapply(MyList,"[", , 2)
## [[1]]
## [1] 4 5 6
##
## [[2]]
## [1] 8 9 10 11
##
## [[3]]
## [1] 8 9 10
Obtain the number of missing values per pbc
variable
sapply(pbc, function(x) { sum(is.na(x)) } )
## id time status trt age sex ascites hepato spiders edema bili
## 0 0 0 106 0 0 106 106 106 0 0
## chol albumin copper alk.phos ast trig platelet protime stage
## 134 0 108 106 106 136 11 2 6
Other examples
Obtain the quadratic term of the vector 1:3
Present the results as a vector
sapply(1:3, function(x) { x^2 } )
## [1] 1 4 9
Create a list that consist of Mat
and Mat^2
Obtain the mean of each element
Present the results as a vector
X <- list(Mat, Mat^2)
sapply(X, mean)
## [1] 97.33 12691.67
Select the second column and first row of each element
Present the results as a vector
sapply(MyList,"[", 2, 1)
## [1] 2 5 9
Obtain the mean age
and time
per sex
tapply(pbc$age, pbc$sex, mean)
## m f
## 55.71072 50.15694
tapply(pbc$time, pbc$sex, mean)
## m f
## 1894.023 1920.578
Obtain the mean age
and time
(both elements of the variables devided by two) per sex
tapply(pbc$age, pbc$sex, function(x) { mean(x/2) } )
## m f
## 27.85536 25.07847
tapply(pbc$time, pbc$sex, function(x) { mean(x/2) } )
## m f
## 947.0114 960.2888
Obtain the mean age
and time
per sex
and status
tapply(pbc$age, list(pbc$status, pbc$sex), mean)
## m f
## 0 55.68595 49.02203
## 1 42.16290 41.62939
## 2 57.42174 53.30739
tapply(pbc$time, list(pbc$status, pbc$sex), mean)
## m f
## 0 2462.176 2322.953
## 1 1147.667 1600.545
## 2 1584.875 1340.504
Create a list:
mapply(rep, 1:4, 4:1)
## [[1]]
## [1] 1 1 1 1
##
## [[2]]
## [1] 2 2 2
##
## [[3]]
## [1] 3 3
##
## [[4]]
## [1] 4
#### alternative run: list(rep(1, 4), rep(2, 3), rep(3, 2), rep(4, 1))
Create a list:
mapply(rep, times = 1:4, x = 4)
## [[1]]
## [1] 4
##
## [[2]]
## [1] 4 4
##
## [[3]]
## [1] 4 4 4
##
## [[4]]
## [1] 4 4 4 4
#### alternative run: list(rep(4, times = 1), rep(4, times = 2), rep(4, times = 3), rep(4, times = 4))
Create a list:
mapply(rep,1:4, 4, SIMPLIFY = FALSE)
## [[1]]
## [1] 1 1 1 1
##
## [[2]]
## [1] 2 2 2 2
##
## [[3]]
## [1] 3 3 3 3
##
## [[4]]
## [1] 4 4 4 4
### alternative run: list(rep(1, 4), rep(2, 4), rep(3, 4), rep(4, 4))
Note: if the length is the same we can obtain a simplified output
mapply(rep,1:4, 4, SIMPLIFY = TRUE)
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
## [2,] 1 2 3 4
## [3,] 1 2 3 4
## [4,] 1 2 3 4
### alternative run: matrix(c(rep(1, 4), rep(2, 4), rep(3, 4), rep(4, 4)), 4, 4)
Other examples
mapply(function(x,y) { seq_len(x) + y },
c(a = 1, b = 2, c = 3),
c(A = 10, B = 0, C = -10))
## $a
## [1] 11
##
## $b
## [1] 1 2
##
## $c
## [1] -9 -8 -7
#### alternative run: list(c(1) + 10, c(1, 2) + 0, c(1, 2, 3) - 10)
X <- list(Mat, Mat^2)
mapply(mean, X)
## [1] 97.33 12691.67
Note!
mapply(mean, MyList)
## [1] 5.0 9.5 9.0
sapply(MyList, mean)
## [1] 5.0 9.5 9.0
mapply(function(x,y) { x^y }, x = c(2, 3), y = c(4))
## [1] 16 81
#### alternative run: list(2^4, 3^4)
Let’s assume that only the long format data set pbcseq
is available
We want to obtain the mean serum bilirubin
of the last follow-up measurement (specified as day
) per status
group
Each patient is counted once!
head(pbcseq)
## id futime status trt age sex day ascites hepato spiders edema bili chol albumin alk.phos ast
## 1 1 400 2 1 58.76523 f 0 1 1 1 1 14.5 261 2.60 1718 138.0
## 2 1 400 2 1 58.76523 f 192 1 1 1 1 21.3 NA 2.94 1612 6.2
## 3 2 5169 0 1 56.44627 f 0 0 1 1 0 1.1 302 4.14 7395 113.5
## 4 2 5169 0 1 56.44627 f 182 0 1 1 0 0.8 NA 3.60 2107 139.5
## 5 2 5169 0 1 56.44627 f 365 0 1 1 0 1.0 NA 3.55 1711 144.2
## 6 2 5169 0 1 56.44627 f 768 0 1 1 0 1.9 NA 3.92 1365 144.2
## platelet protime stage
## 1 190 12.2 4
## 2 183 11.2 4
## 3 221 10.6 3
## 4 188 11.0 3
## 5 161 11.6 3
## 6 122 10.6 3
Sort data
pbcseq <- pbcseq[order(pbcseq$id, pbcseq$day), ]
Select the last follow-up measurement of each patient
pbcseq.idNEW2 <- pbcseq[tapply(rownames(pbcseq), pbcseq$id, tail, 1), ]
Step by step
tapply(rownames(pbcseq), pbcseq$id, tail, 1)
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## "2" "11" "15" "22" "28" "34" "41" "49" "56" "57" "69" "71" "83" "90" "101"
## 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
## "114" "117" "118" "133" "137" "149" "152" "154" "167" "179" "185" "186" "189" "199" "202"
## 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
## "214" "230" "240" "254" "258" "269" "271" "281" "289" "304" "308" "324" "339" "350" "359"
## 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## "367" "374" "383" "387" "396" "406" "415" "418" "423" "429" "435" "446" "462" "469" "482"
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
## "496" "506" "509" "515" "521" "534" "544" "558" "562" "575" "586" "592" "607" "611" "616"
## 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## "617" "619" "623" "634" "638" "648" "658" "673" "676" "687" "688" "690" "694" "698" "707"
## 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
## "710" "711" "726" "729" "730" "744" "748" "762" "775" "779" "791" "803" "804" "814" "824"
## 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## "829" "840" "848" "858" "865" "873" "884" "889" "900" "912" "923" "928" "935" "937" "945"
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
## "946" "956" "958" "959" "968" "973" "984" "989" "1001" "1007" "1011" "1017" "1026" "1038" "1048"
## 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
## "1057" "1069" "1075" "1081" "1093" "1096" "1105" "1108" "1112" "1116" "1123" "1128" "1134" "1137" "1146"
## 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
## "1157" "1160" "1170" "1171" "1174" "1178" "1189" "1198" "1202" "1210" "1221" "1222" "1225" "1226" "1231"
## 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## "1239" "1245" "1255" "1260" "1261" "1271" "1278" "1287" "1297" "1307" "1313" "1314" "1320" "1323" "1333"
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
## "1334" "1339" "1347" "1352" "1357" "1361" "1365" "1373" "1378" "1382" "1383" "1393" "1397" "1402" "1403"
## 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
## "1411" "1420" "1428" "1431" "1442" "1452" "1454" "1463" "1471" "1476" "1484" "1491" "1494" "1496" "1506"
## 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
## "1514" "1520" "1522" "1526" "1530" "1537" "1541" "1551" "1557" "1562" "1570" "1574" "1575" "1583" "1592"
## 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
## "1601" "1605" "1609" "1612" "1616" "1620" "1624" "1625" "1627" "1629" "1631" "1633" "1636" "1639" "1647"
## 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
## "1651" "1659" "1661" "1665" "1667" "1673" "1677" "1685" "1688" "1690" "1691" "1694" "1702" "1706" "1712"
## 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## "1718" "1721" "1729" "1734" "1735" "1741" "1744" "1749" "1753" "1757" "1764" "1765" "1767" "1774" "1777"
## 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
## "1785" "1791" "1794" "1799" "1803" "1805" "1810" "1815" "1822" "1829" "1830" "1834" "1838" "1845" "1846"
## 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
## "1848" "1852" "1854" "1858" "1865" "1870" "1873" "1875" "1882" "1884" "1887" "1890" "1893" "1894" "1896"
## 301 302 303 304 305 306 307 308 309 310 311 312
## "1899" "1904" "1908" "1909" "1914" "1916" "1921" "1926" "1931" "1936" "1940" "1945"
### alternative run: pbcseq.idNEW2 <- pbcseq[!duplicated(pbcseq[c("id")], fromLast = TRUE), ]
Obtain the mean serum bilirubin
per status
group
tapply(pbcseq.idNEW2$bili, pbcseq.idNEW2$status, mean)
## 0 1 2
## 2.190909 9.172414 11.800000
Let’s again assume that only the long format data set pbcseq
is available
We want to obtain the mean serum bilirubin
of the last stage of edema
(for multiple cases select last follow-up measurement) per status
group
Each patient and edema
stage is counted once!
head(pbcseq)
## id futime status trt age sex day ascites hepato spiders edema bili chol albumin alk.phos ast
## 1 1 400 2 1 58.76523 f 0 1 1 1 1 14.5 261 2.60 1718 138.0
## 2 1 400 2 1 58.76523 f 192 1 1 1 1 21.3 NA 2.94 1612 6.2
## 3 2 5169 0 1 56.44627 f 0 0 1 1 0 1.1 302 4.14 7395 113.5
## 4 2 5169 0 1 56.44627 f 182 0 1 1 0 0.8 NA 3.60 2107 139.5
## 5 2 5169 0 1 56.44627 f 365 0 1 1 0 1.0 NA 3.55 1711 144.2
## 6 2 5169 0 1 56.44627 f 768 0 1 1 0 1.9 NA 3.92 1365 144.2
## platelet protime stage
## 1 190 12.2 4
## 2 183 11.2 4
## 3 221 10.6 3
## 4 188 11.0 3
## 5 161 11.6 3
## 6 122 10.6 3
Sort data
pbcseq <- pbcseq[order(pbcseq$id, pbcseq$edema, pbcseq$day), ]
Select the last stage of edema
of each patient
pbcseq.idNEW3 <- pbcseq[tapply(rownames(pbcseq), pbcseq$id, tail, 1), ]
Step by step
tapply(rownames(pbcseq), pbcseq$id, tail, 1)
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## "2" "11" "15" "22" "27" "34" "41" "49" "56" "57" "69" "71" "83" "87" "101"
## 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
## "114" "117" "118" "119" "137" "149" "152" "154" "167" "179" "185" "186" "189" "199" "202"
## 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
## "214" "230" "240" "251" "257" "268" "271" "281" "288" "304" "308" "324" "339" "348" "359"
## 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## "367" "374" "383" "387" "396" "406" "415" "418" "423" "429" "435" "446" "462" "469" "481"
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
## "496" "506" "507" "515" "521" "534" "544" "558" "562" "575" "584" "592" "607" "611" "612"
## 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## "617" "619" "623" "634" "638" "648" "658" "673" "676" "687" "688" "690" "694" "698" "707"
## 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
## "709" "711" "725" "729" "730" "744" "746" "762" "766" "779" "791" "803" "804" "814" "821"
## 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## "827" "840" "848" "858" "865" "873" "881" "889" "900" "912" "914" "928" "935" "937" "945"
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
## "946" "956" "958" "959" "968" "973" "981" "989" "1001" "1005" "1011" "1017" "1026" "1038" "1048"
## 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
## "1057" "1069" "1075" "1081" "1093" "1096" "1105" "1108" "1112" "1116" "1123" "1124" "1134" "1135" "1146"
## 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
## "1157" "1160" "1170" "1171" "1174" "1177" "1182" "1198" "1202" "1210" "1221" "1222" "1225" "1226" "1231"
## 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## "1239" "1245" "1255" "1260" "1261" "1271" "1278" "1287" "1297" "1307" "1313" "1314" "1320" "1323" "1330"
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
## "1334" "1337" "1347" "1349" "1357" "1361" "1365" "1373" "1378" "1382" "1383" "1392" "1397" "1402" "1403"
## 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
## "1411" "1420" "1428" "1431" "1442" "1449" "1454" "1463" "1471" "1476" "1484" "1491" "1494" "1495" "1506"
## 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
## "1514" "1515" "1522" "1526" "1530" "1537" "1541" "1551" "1557" "1562" "1570" "1574" "1575" "1583" "1592"
## 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
## "1601" "1605" "1609" "1612" "1616" "1620" "1624" "1625" "1626" "1629" "1631" "1633" "1636" "1639" "1647"
## 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
## "1651" "1659" "1661" "1665" "1667" "1672" "1677" "1685" "1688" "1690" "1691" "1694" "1701" "1706" "1712"
## 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## "1718" "1721" "1729" "1733" "1735" "1740" "1744" "1749" "1753" "1757" "1763" "1765" "1767" "1774" "1777"
## 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
## "1782" "1791" "1794" "1799" "1803" "1805" "1806" "1815" "1822" "1829" "1830" "1834" "1838" "1845" "1846"
## 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
## "1848" "1852" "1853" "1858" "1865" "1870" "1873" "1874" "1882" "1884" "1887" "1890" "1893" "1894" "1896"
## 301 302 303 304 305 306 307 308 309 310 311 312
## "1899" "1904" "1908" "1909" "1914" "1916" "1921" "1926" "1931" "1936" "1940" "1945"
### alternative run: pbcseq.idNEW3 <- pbcseq[!duplicated(pbcseq[c("id")], fromLast = TRUE), ]
Obtain the mean serum bilirubin
per status
group
tapply(pbcseq.idNEW3$bili, pbcseq.idNEW3$status, mean)
## 0 1 2
## 2.182517 8.651724 11.425000