Take-home Exercise 3

A new article created using the Distill format.

Chu Yi Ning https://www.linkedin.com/public-profile/settings?trk=d_flagship3_profile_self_view_public_profile (SMU SCIS)https://scis.smu.edu.sg/
2022-05-16

Overview

Getting Started

packages = c('tidyverse', 'patchwork', 'dplyr', 'lubridate', 'reshape2', 'ggthemes',
             'knitr', 'zoo', 'ggplot2', 'plotly', 'scales', 'directlabels')
for(p in packages){
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}

Data Cleaning

data <- read_csv("data/FinancialJournal.csv")
dim(data)
[1] 1856330       4

Order by Participant ID

data_orderBypart <- data[order(data$participantId),]
participantId timestamp category amount
0 2022-03-01 00:00:00 Wage 2472.507559
0 2022-03-01 00:00:00 Shelter -554.988622
0 2022-03-01 00:00:00 Education -38.005380
0 2022-03-01 00:00:00 Shelter -554.988622
0 2022-03-01 00:00:00 Education -38.005380
0 2022-03-01 08:25:00 Wage 4.284167

We can see there are some duplicates for Shelter and Education.

distinct function in dplyr will be used to remove duplicates

data_clean <- data %>% distinct(participantId, timestamp, category, amount,
                   .keep_all = TRUE)
participantId timestamp category amount
0 2022-03-01 00:00:00 Wage 2472.507559
0 2022-03-01 00:00:00 Shelter -554.988622
0 2022-03-01 00:00:00 Education -38.005380
0 2022-03-01 08:25:00 Wage 4.284167
0 2022-03-01 08:50:00 Food -4.173345
0 2022-03-01 16:25:00 Wage 385.575003
dim(data_clean)
[1] 1855217       4

There are 1113 duplicates.

Add a new column Month which extract month from timestamp

data_clean <- data_clean %>%
  mutate(yearmonth = as.yearmon(timestamp))
data_tidy <- data_clean %>% 
              group_by(yearmonth, category) %>%
              summarise(amount = abs(sum(amount)),
                             .groups = 'drop')
data_tidy <- data_tidy %>%
  spread(category, amount)
data_tidy <- data_tidy %>%
  mutate(CostofLiving = select(., Education, Food, Shelter) %>% rowSums(na.rm = TRUE))
yearmonth Education Food Recreation RentAdjustment Shelter Wage CostofLiving
Mar 2022 14353.84 327843.1 649590.7 53503.488 631623.1 6402720 973820.1
Apr 2022 11423.44 304297.1 389694.9 1429.244 559918.6 3468756 875639.1
May 2022 11423.44 313555.6 336418.1 NA 558450.7 3623068 883429.8
Jun 2022 11423.44 302909.2 314808.3 NA 558450.7 3608883 872783.3
Jul 2022 11423.44 313819.4 329613.2 NA 558450.7 3485799 883693.5
Aug 2022 11423.44 312841.3 306962.6 NA 558450.7 3763807 882715.5
data_tidy2 <- data_tidy[c(-2,-3,-5,-6)]
data_tidy3 <- melt(data_tidy2, id.vars = 'yearmonth')
head(data_tidy3)
  yearmonth   variable    value
1  Mar 2022 Recreation 649590.7
2  Apr 2022 Recreation 389694.9
3  May 2022 Recreation 336418.1
4  Jun 2022 Recreation 314808.3
5  Jul 2022 Recreation 329613.2
6  Aug 2022 Recreation 306962.6
rename(data_tidy3, category = variable, amount = value)
   yearmonth     category    amount
1   Mar 2022   Recreation  649590.7
2   Apr 2022   Recreation  389694.9
3   May 2022   Recreation  336418.1
4   Jun 2022   Recreation  314808.3
5   Jul 2022   Recreation  329613.2
6   Aug 2022   Recreation  306962.6
7   Sep 2022   Recreation  301910.3
8   Oct 2022   Recreation  326722.3
9   Nov 2022   Recreation  288290.5
10  Dec 2022   Recreation  316620.8
11  Jan 2023   Recreation  311579.8
12  Feb 2023   Recreation  271126.5
13  Mar 2023   Recreation  295210.4
14  Apr 2023   Recreation  310294.3
15  May 2023   Recreation  224251.5
16  Mar 2022         Wage 6402719.8
17  Apr 2022         Wage 3468756.2
18  May 2022         Wage 3623067.6
19  Jun 2022         Wage 3608882.6
20  Jul 2022         Wage 3485799.0
21  Aug 2022         Wage 3763807.4
22  Sep 2022         Wage 3610503.6
23  Oct 2022         Wage 3483352.2
24  Nov 2022         Wage 3607155.9
25  Dec 2022         Wage 3627931.7
26  Jan 2023         Wage 3623061.0
27  Feb 2023         Wage 3292643.3
28  Mar 2023         Wage 3767956.4
29  Apr 2023         Wage 3326739.0
30  May 2023         Wage 2940646.0
31  Mar 2022 CostofLiving  973820.1
32  Apr 2022 CostofLiving  875639.1
33  May 2022 CostofLiving  883429.8
34  Jun 2022 CostofLiving  872783.3
35  Jul 2022 CostofLiving  883693.5
36  Aug 2022 CostofLiving  882715.5
37  Sep 2022 CostofLiving  872752.2
38  Oct 2022 CostofLiving  883451.4
39  Nov 2022 CostofLiving  872721.3
40  Dec 2022 CostofLiving  882976.9
41  Jan 2023 CostofLiving  883146.1
42  Feb 2023 CostofLiving  852352.0
43  Mar 2023 CostofLiving  882848.6
44  Apr 2023 CostofLiving  873691.0
45  May 2023 CostofLiving  812179.8
p1 <- ggplot(data_tidy3, aes(x = yearmonth, y = value, colour = variable)) + 
  geom_line(aes(linetype = variable), lwd = 1.2) + 
  scale_color_manual(values = c("darkred", "steelblue",  "orange4"))+
  scale_y_continuous(labels = scales::comma)+
  ylab("Amount")+theme_economist()+theme(legend.position="none")#+
  #geom_dl(aes(label = variable), method = list(dl.trans(x = x + .2), "last.points"))

ggplotly(p1, tooltip = c('yearmonth', 'value'))