Load packages

If you are using the package for the first time, you will have to first install it

# install.packages("survival") 
library(survival)

Get data

Load data sets from package

pbc <- survival::pbc
pbcseq <- survival::pbcseq

Wide format data

apply

Obtain the mean of columns time and age in the pbc data set

apply(pbc[, c(2,5)], 2, mean)
##       time        age 
## 1917.78230   50.74155
apply(pbc[, c("time", "age")], 2, mean)
##       time        age 
## 1917.78230   50.74155

Obtain the mean of rows in the pbc data set
Before running the code, think if it is meaningful

# apply(pbc[, ], 1, mean)

Obtain the standardized values of columns time, age and bili in the pbc data set

head(apply(pbc[, c("time", "age", "bili")], 2, function(x) { (x-mean(x))/sd(x) } ))
##              time        age        bili
## [1,] -1.373965243  0.7680208  2.55908571
## [2,]  2.337540359  0.5460516 -0.48118215
## [3,] -0.819955139  1.8503499 -0.41311645
## [4,]  0.006533792  0.3827850 -0.32236219
## [5,] -0.374574466 -1.2095228  0.04065487
## [6,]  0.529765557  1.4852931 -0.54924785

Other examples
Create a matrix

X <-  sample(0:200, 100)
Mat <- matrix(X, 50, 50) 

Obtain the mean value of each row for matrix Mat

apply(Mat, 1, mean)
##  [1] 102.5  64.0 184.5  89.0 120.5 120.0  84.5  90.5  31.0  30.5  77.0 119.0 166.0 167.0 169.5 138.5
## [17]  57.0  64.0  65.5 131.5  94.5  88.5  59.0  78.0 145.0  56.0  69.0 106.5  96.5 179.5  42.5  50.0
## [33] 117.5  90.0 132.5 124.0  46.0  79.5  72.5  79.5  87.0  39.0  64.5 110.0 125.5 128.5 117.5 126.5
## [49] 131.5  58.0

Obtain the mean value of each column for matrix Mat

apply(Mat, 2, mean)
##  [1] 102.82  91.84 102.82  91.84 102.82  91.84 102.82  91.84 102.82  91.84 102.82  91.84 102.82  91.84
## [15] 102.82  91.84 102.82  91.84 102.82  91.84 102.82  91.84 102.82  91.84 102.82  91.84 102.82  91.84
## [29] 102.82  91.84 102.82  91.84 102.82  91.84 102.82  91.84 102.82  91.84 102.82  91.84 102.82  91.84
## [43] 102.82  91.84 102.82  91.84 102.82  91.84 102.82  91.84

Calculate the sum of each column for matrix Mat

apply(Mat, 2, sum)
##  [1] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592
## [21] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592
## [41] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592
apply(Mat, 2, function(x) { sum(x) } )
##  [1] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592
## [21] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592
## [41] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592

Calculate the sum of each row for matrix Mat

apply(Mat, 1, sum)
##  [1] 5125 3200 9225 4450 6025 6000 4225 4525 1550 1525 3850 5950 8300 8350 8475 6925 2850 3200 3275 6575
## [21] 4725 4425 2950 3900 7250 2800 3450 5325 4825 8975 2125 2500 5875 4500 6625 6200 2300 3975 3625 3975
## [41] 4350 1950 3225 5500 6275 6425 5875 6325 6575 2900
apply(Mat, 1, function(x) { sum(x) } )
##  [1] 5125 3200 9225 4450 6025 6000 4225 4525 1550 1525 3850 5950 8300 8350 8475 6925 2850 3200 3275 6575
## [21] 4725 4425 2950 3900 7250 2800 3450 5325 4825 8975 2125 2500 5875 4500 6625 6200 2300 3975 3625 3975
## [41] 4350 1950 3225 5500 6275 6425 5875 6325 6575 2900

There is no one way of doing things in R!

colSums(Mat)
##  [1] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592
## [21] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592
## [41] 5141 4592 5141 4592 5141 4592 5141 4592 5141 4592
rowSums(Mat)
##  [1] 5125 3200 9225 4450 6025 6000 4225 4525 1550 1525 3850 5950 8300 8350 8475 6925 2850 3200 3275 6575
## [21] 4725 4425 2950 3900 7250 2800 3450 5325 4825 8975 2125 2500 5875 4500 6625 6200 2300 3975 3625 3975
## [41] 4350 1950 3225 5500 6275 6425 5875 6325 6575 2900

lapply

Obtain the summary of the pbc data set

lapply(pbc, summary)
## $id
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0   105.2   209.5   209.5   313.8   418.0 
## 
## $time
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      41    1093    1730    1918    2614    4795 
## 
## $status
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.8301  2.0000  2.0000 
## 
## $trt
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.000   1.000   1.000   1.494   2.000   2.000     106 
## 
## $age
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   26.28   42.83   51.00   50.74   58.24   78.44 
## 
## $sex
##   m   f 
##  44 374 
## 
## $ascites
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.00000 0.00000 0.00000 0.07692 0.00000 1.00000     106 
## 
## $hepato
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.0000  1.0000  0.5128  1.0000  1.0000     106 
## 
## $spiders
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.0000  0.0000  0.2885  1.0000  1.0000     106 
## 
## $edema
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.1005  0.0000  1.0000 
## 
## $bili
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.300   0.800   1.400   3.221   3.400  28.000 
## 
## $chol
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   120.0   249.5   309.5   369.5   400.0  1775.0     134 
## 
## $albumin
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.960   3.243   3.530   3.497   3.770   4.640 
## 
## $copper
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    4.00   41.25   73.00   97.65  123.00  588.00     108 
## 
## $alk.phos
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   289.0   871.5  1259.0  1982.7  1980.0 13862.4     106 
## 
## $ast
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   26.35   80.60  114.70  122.56  151.90  457.25     106 
## 
## $trig
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   33.00   84.25  108.00  124.70  151.00  598.00     136 
## 
## $platelet
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    62.0   188.5   251.0   257.0   318.0   721.0      11 
## 
## $protime
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    9.00   10.00   10.60   10.73   11.10   18.00       2 
## 
## $stage
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.000   2.000   3.000   3.024   4.000   4.000       6

Ontain the number of missing values per pbc variable

lapply(pbc, function(x) { sum(is.na(x)) } ) 
## $id
## [1] 0
## 
## $time
## [1] 0
## 
## $status
## [1] 0
## 
## $trt
## [1] 106
## 
## $age
## [1] 0
## 
## $sex
## [1] 0
## 
## $ascites
## [1] 106
## 
## $hepato
## [1] 106
## 
## $spiders
## [1] 106
## 
## $edema
## [1] 0
## 
## $bili
## [1] 0
## 
## $chol
## [1] 134
## 
## $albumin
## [1] 0
## 
## $copper
## [1] 108
## 
## $alk.phos
## [1] 106
## 
## $ast
## [1] 106
## 
## $trig
## [1] 136
## 
## $platelet
## [1] 11
## 
## $protime
## [1] 2
## 
## $stage
## [1] 6

Other examples
Obtain the quadratic term of the vector 1:3
Present the results as a list

lapply(1:3, function(x) { x^2 } )
## [[1]]
## [1] 1
## 
## [[2]]
## [1] 4
## 
## [[3]]
## [1] 9

Create a list that consist of Mat and Mat^2
Obtain the mean of each element
Present the results as a list

X <- list(Mat, Mat^2)
lapply(X, mean)
## [[1]]
## [1] 97.33
## 
## [[2]]
## [1] 12691.67

Create a list

A <- matrix(1:9, 3,3)
B <- matrix(4:15, 4,3)
C <- matrix(8:10, 3,2)

Select elements in a list

MyList <- list(A, B, C) 

Select the first row of each element
Present the results as a list

lapply(MyList,"[", 1, )
## [[1]]
## [1] 1 4 7
## 
## [[2]]
## [1]  4  8 12
## 
## [[3]]
## [1] 8 8

Select the second column of each element
Present the results as a list

lapply(MyList,"[", , 2)
## [[1]]
## [1] 4 5 6
## 
## [[2]]
## [1]  8  9 10 11
## 
## [[3]]
## [1]  8  9 10

sapply

Obtain the number of missing values per pbc variable

sapply(pbc, function(x) { sum(is.na(x)) } ) 
##       id     time   status      trt      age      sex  ascites   hepato  spiders    edema     bili 
##        0        0        0      106        0        0      106      106      106        0        0 
##     chol  albumin   copper alk.phos      ast     trig platelet  protime    stage 
##      134        0      108      106      106      136       11        2        6

Other examples
Obtain the quadratic term of the vector 1:3
Present the results as a vector

sapply(1:3, function(x) { x^2 } )
## [1] 1 4 9

Create a list that consist of Mat and Mat^2
Obtain the mean of each element
Present the results as a vector

X <- list(Mat, Mat^2)
sapply(X, mean)
## [1]    97.33 12691.67

Select the second column and first row of each element
Present the results as a vector

sapply(MyList,"[", 2, 1)
## [1] 2 5 9

tapply

Obtain the mean age and time per sex

tapply(pbc$age, pbc$sex, mean)
##        m        f 
## 55.71072 50.15694
tapply(pbc$time, pbc$sex, mean)
##        m        f 
## 1894.023 1920.578

Obtain the mean age and time (both elements of the variables devided by two) per sex

tapply(pbc$age, pbc$sex, function(x) { mean(x/2) } )
##        m        f 
## 27.85536 25.07847
tapply(pbc$time, pbc$sex, function(x) { mean(x/2) } )
##        m        f 
## 947.0114 960.2888

Obtain the mean age and time per sex and status

tapply(pbc$age, list(pbc$status, pbc$sex), mean)
##          m        f
## 0 55.68595 49.02203
## 1 42.16290 41.62939
## 2 57.42174 53.30739
tapply(pbc$time, list(pbc$status, pbc$sex), mean)
##          m        f
## 0 2462.176 2322.953
## 1 1147.667 1600.545
## 2 1584.875 1340.504

mapply

Create a list:

  • 1st element: repeats 1 four times
  • 2nd element: repeats 2 three times  
  • 3rd element: repeats 3 two times  
  • 4th element: repeats 4 one time
mapply(rep, 1:4, 4:1)
## [[1]]
## [1] 1 1 1 1
## 
## [[2]]
## [1] 2 2 2
## 
## [[3]]
## [1] 3 3
## 
## [[4]]
## [1] 4
#### alternative run: list(rep(1, 4), rep(2, 3), rep(3, 2), rep(4, 1))

Create a list:

  • 1st element: repeats 4 one times
  • 2nd element: repeats 4 two times  
  • 3rd element: repeats 4 three times  
  • 4th element: repeats 4 four time
mapply(rep, times = 1:4, x = 4)
## [[1]]
## [1] 4
## 
## [[2]]
## [1] 4 4
## 
## [[3]]
## [1] 4 4 4
## 
## [[4]]
## [1] 4 4 4 4
#### alternative run: list(rep(4, times = 1), rep(4, times = 2), rep(4, times = 3), rep(4, times = 4))

Create a list:

  • 1st element: repeats 1 four times  
  • 2nd element: repeats 2 four times  
  • 3rd element: repeats 3 four times  
  • 4th element: repeats 4 four time
mapply(rep,1:4, 4, SIMPLIFY = FALSE)
## [[1]]
## [1] 1 1 1 1
## 
## [[2]]
## [1] 2 2 2 2
## 
## [[3]]
## [1] 3 3 3 3
## 
## [[4]]
## [1] 4 4 4 4
### alternative run: list(rep(1, 4), rep(2, 4), rep(3, 4), rep(4, 4))

Note: if the length is the same we can obtain a simplified output

mapply(rep,1:4, 4, SIMPLIFY = TRUE)
##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
## [2,]    1    2    3    4
## [3,]    1    2    3    4
## [4,]    1    2    3    4
### alternative run: matrix(c(rep(1, 4), rep(2, 4), rep(3, 4), rep(4, 4)), 4, 4)

Other examples

mapply(function(x,y) { seq_len(x) + y },
       c(a = 1, b = 2, c = 3),  
       c(A = 10, B = 0, C = -10))
## $a
## [1] 11
## 
## $b
## [1] 1 2
## 
## $c
## [1] -9 -8 -7
#### alternative run: list(c(1) + 10, c(1, 2) + 0, c(1, 2, 3) - 10)

X <- list(Mat, Mat^2)
mapply(mean, X)
## [1]    97.33 12691.67

Note!

mapply(mean, MyList)
## [1] 5.0 9.5 9.0
sapply(MyList, mean)
## [1] 5.0 9.5 9.0
mapply(function(x,y) { x^y }, x = c(2, 3), y = c(4))
## [1] 16 81
#### alternative run: list(2^4, 3^4)

Long format data

Let’s assume that only the long format data set pbcseq is available
We want to obtain the mean serum bilirubin of the last follow-up measurement (specified as day) per status group
Each patient is counted once!

head(pbcseq)
##   id futime status trt      age sex day ascites hepato spiders edema bili chol albumin alk.phos   ast
## 1  1    400      2   1 58.76523   f   0       1      1       1     1 14.5  261    2.60     1718 138.0
## 2  1    400      2   1 58.76523   f 192       1      1       1     1 21.3   NA    2.94     1612   6.2
## 3  2   5169      0   1 56.44627   f   0       0      1       1     0  1.1  302    4.14     7395 113.5
## 4  2   5169      0   1 56.44627   f 182       0      1       1     0  0.8   NA    3.60     2107 139.5
## 5  2   5169      0   1 56.44627   f 365       0      1       1     0  1.0   NA    3.55     1711 144.2
## 6  2   5169      0   1 56.44627   f 768       0      1       1     0  1.9   NA    3.92     1365 144.2
##   platelet protime stage
## 1      190    12.2     4
## 2      183    11.2     4
## 3      221    10.6     3
## 4      188    11.0     3
## 5      161    11.6     3
## 6      122    10.6     3

Sort data

pbcseq <- pbcseq[order(pbcseq$id, pbcseq$day), ]

Select the last follow-up measurement of each patient

pbcseq.idNEW2 <- pbcseq[tapply(rownames(pbcseq), pbcseq$id, tail,  1), ]

Step by step

tapply(rownames(pbcseq), pbcseq$id, tail,  1)
##      1      2      3      4      5      6      7      8      9     10     11     12     13     14     15 
##    "2"   "11"   "15"   "22"   "28"   "34"   "41"   "49"   "56"   "57"   "69"   "71"   "83"   "90"  "101" 
##     16     17     18     19     20     21     22     23     24     25     26     27     28     29     30 
##  "114"  "117"  "118"  "133"  "137"  "149"  "152"  "154"  "167"  "179"  "185"  "186"  "189"  "199"  "202" 
##     31     32     33     34     35     36     37     38     39     40     41     42     43     44     45 
##  "214"  "230"  "240"  "254"  "258"  "269"  "271"  "281"  "289"  "304"  "308"  "324"  "339"  "350"  "359" 
##     46     47     48     49     50     51     52     53     54     55     56     57     58     59     60 
##  "367"  "374"  "383"  "387"  "396"  "406"  "415"  "418"  "423"  "429"  "435"  "446"  "462"  "469"  "482" 
##     61     62     63     64     65     66     67     68     69     70     71     72     73     74     75 
##  "496"  "506"  "509"  "515"  "521"  "534"  "544"  "558"  "562"  "575"  "586"  "592"  "607"  "611"  "616" 
##     76     77     78     79     80     81     82     83     84     85     86     87     88     89     90 
##  "617"  "619"  "623"  "634"  "638"  "648"  "658"  "673"  "676"  "687"  "688"  "690"  "694"  "698"  "707" 
##     91     92     93     94     95     96     97     98     99    100    101    102    103    104    105 
##  "710"  "711"  "726"  "729"  "730"  "744"  "748"  "762"  "775"  "779"  "791"  "803"  "804"  "814"  "824" 
##    106    107    108    109    110    111    112    113    114    115    116    117    118    119    120 
##  "829"  "840"  "848"  "858"  "865"  "873"  "884"  "889"  "900"  "912"  "923"  "928"  "935"  "937"  "945" 
##    121    122    123    124    125    126    127    128    129    130    131    132    133    134    135 
##  "946"  "956"  "958"  "959"  "968"  "973"  "984"  "989" "1001" "1007" "1011" "1017" "1026" "1038" "1048" 
##    136    137    138    139    140    141    142    143    144    145    146    147    148    149    150 
## "1057" "1069" "1075" "1081" "1093" "1096" "1105" "1108" "1112" "1116" "1123" "1128" "1134" "1137" "1146" 
##    151    152    153    154    155    156    157    158    159    160    161    162    163    164    165 
## "1157" "1160" "1170" "1171" "1174" "1178" "1189" "1198" "1202" "1210" "1221" "1222" "1225" "1226" "1231" 
##    166    167    168    169    170    171    172    173    174    175    176    177    178    179    180 
## "1239" "1245" "1255" "1260" "1261" "1271" "1278" "1287" "1297" "1307" "1313" "1314" "1320" "1323" "1333" 
##    181    182    183    184    185    186    187    188    189    190    191    192    193    194    195 
## "1334" "1339" "1347" "1352" "1357" "1361" "1365" "1373" "1378" "1382" "1383" "1393" "1397" "1402" "1403" 
##    196    197    198    199    200    201    202    203    204    205    206    207    208    209    210 
## "1411" "1420" "1428" "1431" "1442" "1452" "1454" "1463" "1471" "1476" "1484" "1491" "1494" "1496" "1506" 
##    211    212    213    214    215    216    217    218    219    220    221    222    223    224    225 
## "1514" "1520" "1522" "1526" "1530" "1537" "1541" "1551" "1557" "1562" "1570" "1574" "1575" "1583" "1592" 
##    226    227    228    229    230    231    232    233    234    235    236    237    238    239    240 
## "1601" "1605" "1609" "1612" "1616" "1620" "1624" "1625" "1627" "1629" "1631" "1633" "1636" "1639" "1647" 
##    241    242    243    244    245    246    247    248    249    250    251    252    253    254    255 
## "1651" "1659" "1661" "1665" "1667" "1673" "1677" "1685" "1688" "1690" "1691" "1694" "1702" "1706" "1712" 
##    256    257    258    259    260    261    262    263    264    265    266    267    268    269    270 
## "1718" "1721" "1729" "1734" "1735" "1741" "1744" "1749" "1753" "1757" "1764" "1765" "1767" "1774" "1777" 
##    271    272    273    274    275    276    277    278    279    280    281    282    283    284    285 
## "1785" "1791" "1794" "1799" "1803" "1805" "1810" "1815" "1822" "1829" "1830" "1834" "1838" "1845" "1846" 
##    286    287    288    289    290    291    292    293    294    295    296    297    298    299    300 
## "1848" "1852" "1854" "1858" "1865" "1870" "1873" "1875" "1882" "1884" "1887" "1890" "1893" "1894" "1896" 
##    301    302    303    304    305    306    307    308    309    310    311    312 
## "1899" "1904" "1908" "1909" "1914" "1916" "1921" "1926" "1931" "1936" "1940" "1945"
### alternative run: pbcseq.idNEW2 <- pbcseq[!duplicated(pbcseq[c("id")], fromLast = TRUE), ]

Obtain the mean serum bilirubin per status group

tapply(pbcseq.idNEW2$bili, pbcseq.idNEW2$status, mean)
##         0         1         2 
##  2.190909  9.172414 11.800000

Let’s again assume that only the long format data set pbcseq is available
We want to obtain the mean serum bilirubin of the last stage of edema (for multiple cases select last follow-up measurement) per status group
Each patient and edema stage is counted once!

head(pbcseq)
##   id futime status trt      age sex day ascites hepato spiders edema bili chol albumin alk.phos   ast
## 1  1    400      2   1 58.76523   f   0       1      1       1     1 14.5  261    2.60     1718 138.0
## 2  1    400      2   1 58.76523   f 192       1      1       1     1 21.3   NA    2.94     1612   6.2
## 3  2   5169      0   1 56.44627   f   0       0      1       1     0  1.1  302    4.14     7395 113.5
## 4  2   5169      0   1 56.44627   f 182       0      1       1     0  0.8   NA    3.60     2107 139.5
## 5  2   5169      0   1 56.44627   f 365       0      1       1     0  1.0   NA    3.55     1711 144.2
## 6  2   5169      0   1 56.44627   f 768       0      1       1     0  1.9   NA    3.92     1365 144.2
##   platelet protime stage
## 1      190    12.2     4
## 2      183    11.2     4
## 3      221    10.6     3
## 4      188    11.0     3
## 5      161    11.6     3
## 6      122    10.6     3

Sort data

pbcseq <- pbcseq[order(pbcseq$id, pbcseq$edema, pbcseq$day), ]

Select the last stage of edema of each patient

pbcseq.idNEW3 <- pbcseq[tapply(rownames(pbcseq), pbcseq$id, tail,  1), ]

Step by step

tapply(rownames(pbcseq), pbcseq$id, tail,  1)
##      1      2      3      4      5      6      7      8      9     10     11     12     13     14     15 
##    "2"   "11"   "15"   "22"   "27"   "34"   "41"   "49"   "56"   "57"   "69"   "71"   "83"   "87"  "101" 
##     16     17     18     19     20     21     22     23     24     25     26     27     28     29     30 
##  "114"  "117"  "118"  "119"  "137"  "149"  "152"  "154"  "167"  "179"  "185"  "186"  "189"  "199"  "202" 
##     31     32     33     34     35     36     37     38     39     40     41     42     43     44     45 
##  "214"  "230"  "240"  "251"  "257"  "268"  "271"  "281"  "288"  "304"  "308"  "324"  "339"  "348"  "359" 
##     46     47     48     49     50     51     52     53     54     55     56     57     58     59     60 
##  "367"  "374"  "383"  "387"  "396"  "406"  "415"  "418"  "423"  "429"  "435"  "446"  "462"  "469"  "481" 
##     61     62     63     64     65     66     67     68     69     70     71     72     73     74     75 
##  "496"  "506"  "507"  "515"  "521"  "534"  "544"  "558"  "562"  "575"  "584"  "592"  "607"  "611"  "612" 
##     76     77     78     79     80     81     82     83     84     85     86     87     88     89     90 
##  "617"  "619"  "623"  "634"  "638"  "648"  "658"  "673"  "676"  "687"  "688"  "690"  "694"  "698"  "707" 
##     91     92     93     94     95     96     97     98     99    100    101    102    103    104    105 
##  "709"  "711"  "725"  "729"  "730"  "744"  "746"  "762"  "766"  "779"  "791"  "803"  "804"  "814"  "821" 
##    106    107    108    109    110    111    112    113    114    115    116    117    118    119    120 
##  "827"  "840"  "848"  "858"  "865"  "873"  "881"  "889"  "900"  "912"  "914"  "928"  "935"  "937"  "945" 
##    121    122    123    124    125    126    127    128    129    130    131    132    133    134    135 
##  "946"  "956"  "958"  "959"  "968"  "973"  "981"  "989" "1001" "1005" "1011" "1017" "1026" "1038" "1048" 
##    136    137    138    139    140    141    142    143    144    145    146    147    148    149    150 
## "1057" "1069" "1075" "1081" "1093" "1096" "1105" "1108" "1112" "1116" "1123" "1124" "1134" "1135" "1146" 
##    151    152    153    154    155    156    157    158    159    160    161    162    163    164    165 
## "1157" "1160" "1170" "1171" "1174" "1177" "1182" "1198" "1202" "1210" "1221" "1222" "1225" "1226" "1231" 
##    166    167    168    169    170    171    172    173    174    175    176    177    178    179    180 
## "1239" "1245" "1255" "1260" "1261" "1271" "1278" "1287" "1297" "1307" "1313" "1314" "1320" "1323" "1330" 
##    181    182    183    184    185    186    187    188    189    190    191    192    193    194    195 
## "1334" "1337" "1347" "1349" "1357" "1361" "1365" "1373" "1378" "1382" "1383" "1392" "1397" "1402" "1403" 
##    196    197    198    199    200    201    202    203    204    205    206    207    208    209    210 
## "1411" "1420" "1428" "1431" "1442" "1449" "1454" "1463" "1471" "1476" "1484" "1491" "1494" "1495" "1506" 
##    211    212    213    214    215    216    217    218    219    220    221    222    223    224    225 
## "1514" "1515" "1522" "1526" "1530" "1537" "1541" "1551" "1557" "1562" "1570" "1574" "1575" "1583" "1592" 
##    226    227    228    229    230    231    232    233    234    235    236    237    238    239    240 
## "1601" "1605" "1609" "1612" "1616" "1620" "1624" "1625" "1626" "1629" "1631" "1633" "1636" "1639" "1647" 
##    241    242    243    244    245    246    247    248    249    250    251    252    253    254    255 
## "1651" "1659" "1661" "1665" "1667" "1672" "1677" "1685" "1688" "1690" "1691" "1694" "1701" "1706" "1712" 
##    256    257    258    259    260    261    262    263    264    265    266    267    268    269    270 
## "1718" "1721" "1729" "1733" "1735" "1740" "1744" "1749" "1753" "1757" "1763" "1765" "1767" "1774" "1777" 
##    271    272    273    274    275    276    277    278    279    280    281    282    283    284    285 
## "1782" "1791" "1794" "1799" "1803" "1805" "1806" "1815" "1822" "1829" "1830" "1834" "1838" "1845" "1846" 
##    286    287    288    289    290    291    292    293    294    295    296    297    298    299    300 
## "1848" "1852" "1853" "1858" "1865" "1870" "1873" "1874" "1882" "1884" "1887" "1890" "1893" "1894" "1896" 
##    301    302    303    304    305    306    307    308    309    310    311    312 
## "1899" "1904" "1908" "1909" "1914" "1916" "1921" "1926" "1931" "1936" "1940" "1945"
### alternative run: pbcseq.idNEW3 <- pbcseq[!duplicated(pbcseq[c("id")], fromLast = TRUE), ]

Obtain the mean serum bilirubin per status group

tapply(pbcseq.idNEW3$bili, pbcseq.idNEW3$status, mean)
##         0         1         2 
##  2.182517  8.651724 11.425000