A new article created using the Distill format.
packages = c('tidyverse', 'patchwork', 'dplyr', 'lubridate', 'reshape2', 'ggthemes',
'knitr', 'zoo', 'ggplot2', 'plotly', 'scales', 'directlabels')
for(p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
data <- read_csv("data/FinancialJournal.csv")
dim(data)
[1] 1856330 4
data_orderBypart <- data[order(data$participantId),]
| participantId | timestamp | category | amount |
|---|---|---|---|
| 0 | 2022-03-01 00:00:00 | Wage | 2472.507559 |
| 0 | 2022-03-01 00:00:00 | Shelter | -554.988622 |
| 0 | 2022-03-01 00:00:00 | Education | -38.005380 |
| 0 | 2022-03-01 00:00:00 | Shelter | -554.988622 |
| 0 | 2022-03-01 00:00:00 | Education | -38.005380 |
| 0 | 2022-03-01 08:25:00 | Wage | 4.284167 |
We can see there are some duplicates for Shelter and Education.
distinct function in dplyr will be used to remove duplicates
data_clean <- data %>% distinct(participantId, timestamp, category, amount,
.keep_all = TRUE)
| participantId | timestamp | category | amount |
|---|---|---|---|
| 0 | 2022-03-01 00:00:00 | Wage | 2472.507559 |
| 0 | 2022-03-01 00:00:00 | Shelter | -554.988622 |
| 0 | 2022-03-01 00:00:00 | Education | -38.005380 |
| 0 | 2022-03-01 08:25:00 | Wage | 4.284167 |
| 0 | 2022-03-01 08:50:00 | Food | -4.173345 |
| 0 | 2022-03-01 16:25:00 | Wage | 385.575003 |
dim(data_clean)
[1] 1855217 4
There are 1113 duplicates.
data_clean <- data_clean %>%
mutate(yearmonth = as.yearmon(timestamp))
data_tidy <- data_tidy %>%
spread(category, amount)
data_tidy <- data_tidy %>%
mutate(CostofLiving = select(., Education, Food, Shelter) %>% rowSums(na.rm = TRUE))
| yearmonth | Education | Food | Recreation | RentAdjustment | Shelter | Wage | CostofLiving |
|---|---|---|---|---|---|---|---|
| Mar 2022 | 14353.84 | 327843.1 | 649590.7 | 53503.488 | 631623.1 | 6402720 | 973820.1 |
| Apr 2022 | 11423.44 | 304297.1 | 389694.9 | 1429.244 | 559918.6 | 3468756 | 875639.1 |
| May 2022 | 11423.44 | 313555.6 | 336418.1 | NA | 558450.7 | 3623068 | 883429.8 |
| Jun 2022 | 11423.44 | 302909.2 | 314808.3 | NA | 558450.7 | 3608883 | 872783.3 |
| Jul 2022 | 11423.44 | 313819.4 | 329613.2 | NA | 558450.7 | 3485799 | 883693.5 |
| Aug 2022 | 11423.44 | 312841.3 | 306962.6 | NA | 558450.7 | 3763807 | 882715.5 |
data_tidy2 <- data_tidy[c(-2,-3,-5,-6)]
data_tidy3 <- melt(data_tidy2, id.vars = 'yearmonth')
head(data_tidy3)
yearmonth variable value
1 Mar 2022 Recreation 649590.7
2 Apr 2022 Recreation 389694.9
3 May 2022 Recreation 336418.1
4 Jun 2022 Recreation 314808.3
5 Jul 2022 Recreation 329613.2
6 Aug 2022 Recreation 306962.6
rename(data_tidy3, category = variable, amount = value)
yearmonth category amount
1 Mar 2022 Recreation 649590.7
2 Apr 2022 Recreation 389694.9
3 May 2022 Recreation 336418.1
4 Jun 2022 Recreation 314808.3
5 Jul 2022 Recreation 329613.2
6 Aug 2022 Recreation 306962.6
7 Sep 2022 Recreation 301910.3
8 Oct 2022 Recreation 326722.3
9 Nov 2022 Recreation 288290.5
10 Dec 2022 Recreation 316620.8
11 Jan 2023 Recreation 311579.8
12 Feb 2023 Recreation 271126.5
13 Mar 2023 Recreation 295210.4
14 Apr 2023 Recreation 310294.3
15 May 2023 Recreation 224251.5
16 Mar 2022 Wage 6402719.8
17 Apr 2022 Wage 3468756.2
18 May 2022 Wage 3623067.6
19 Jun 2022 Wage 3608882.6
20 Jul 2022 Wage 3485799.0
21 Aug 2022 Wage 3763807.4
22 Sep 2022 Wage 3610503.6
23 Oct 2022 Wage 3483352.2
24 Nov 2022 Wage 3607155.9
25 Dec 2022 Wage 3627931.7
26 Jan 2023 Wage 3623061.0
27 Feb 2023 Wage 3292643.3
28 Mar 2023 Wage 3767956.4
29 Apr 2023 Wage 3326739.0
30 May 2023 Wage 2940646.0
31 Mar 2022 CostofLiving 973820.1
32 Apr 2022 CostofLiving 875639.1
33 May 2022 CostofLiving 883429.8
34 Jun 2022 CostofLiving 872783.3
35 Jul 2022 CostofLiving 883693.5
36 Aug 2022 CostofLiving 882715.5
37 Sep 2022 CostofLiving 872752.2
38 Oct 2022 CostofLiving 883451.4
39 Nov 2022 CostofLiving 872721.3
40 Dec 2022 CostofLiving 882976.9
41 Jan 2023 CostofLiving 883146.1
42 Feb 2023 CostofLiving 852352.0
43 Mar 2023 CostofLiving 882848.6
44 Apr 2023 CostofLiving 873691.0
45 May 2023 CostofLiving 812179.8
p1 <- ggplot(data_tidy3, aes(x = yearmonth, y = value, colour = variable)) +
geom_line(aes(linetype = variable), lwd = 1.2) +
scale_color_manual(values = c("darkred", "steelblue", "orange4"))+
scale_y_continuous(labels = scales::comma)+
ylab("Amount")+theme_economist()+theme(legend.position="none")#+
#geom_dl(aes(label = variable), method = list(dl.trans(x = x + .2), "last.points"))
ggplotly(p1, tooltip = c('yearmonth', 'value'))