How subtotal a dataframe and also grandtotal
Problem Description:
I’m looking for a way to summarize my dataframe into a summary table that will have the total sum of each observation and the entire sum in percentage and also the total count and total count in percentage and each having a subgroup of total and also Grand total of each observation.
here is my sample dataset below.
struct<- samples_stack <- dput(samples[1:50,])
structure(list(Merchant = c("Fat", "Fat", "United", "WAVE", "Fat",
"Fat", "Fat", "Fat", "Fat", "Fat", "WAVE", "WAVE", "WAVE", "WAVE",
"Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat",
"Fat", "Fat", "Fat", "WAVE", "WAVE", "WAVE", "WAVE", "WAVE",
"Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat",
"Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat",
"Fat"), Network = c("G", "G", "G", "X9", "G", "G", "M", "M",
"M", "M", "M", "M", "M", "M", "G", "G", "G", "G", "A", "A", "A",
"A", "G", "G", "G", "G", "A", "A", "A", "A", "A", "G", "G", "G",
"G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G",
"G", "G", "G"), Type = c("Airtime", "Airtime", "Airtime", "Airtime",
"Airtime", "Airtime", "Airtime", "Airtime", "Airtime", "Airtime",
"Airtime", "Airtime", "Airtime", "Airtime", "Airtime", "Airtime",
"Data", "Airtime", "Data", "Airtime", "Airtime", "Data", "Airtime",
"Data", "Data", "Airtime", "Airtime", "Airtime", "Airtime", "Airtime",
"Airtime", "Airtime", "Data", "Airtime", "Airtime", "Airtime",
"Data", "Airtime", "Airtime", "Data", "Data", "Airtime", "Airtime",
"Data", "Data", "Airtime", "Airtime", "Airtime", "Airtime", "Airtime"
), FaceValue = c(200, 200, 2000, 100, 100, 100, 100, 300, 500,
300, 240, 600, 400, 600, 100, 500, 130, 200, 500, 100, 100, 300,
150, 300, 300, 2000, 200, 240, 600, 400, 400, 500, 250, 100,
100, 500, 450, 50, 1300, 1400, 1400, 2000, 100, 130, 130, 100,
100, 200, 200, 600), Date = c("2022-12-03 23:37:23", "2022-12-03 22:45:52",
"2022-12-03 06:58:19", "2022-12-03 14:06:28", "2022-12-03 22:19:13",
"2022-12-03 22:15:39", "2022-12-03 20:02:12", "2022-12-03 20:01:07",
"2022-12-03 19:14:25", "2022-12-03 14:47:35", "2022-12-03 23:12:40",
"2022-12-03 23:09:18", "2022-12-03 22:57:57", "2022-12-03 22:51:16",
"2022-12-03 21:23:38", "2022-12-03 21:19:43", "2022-12-03 21:03:38",
"2022-12-03 20:57:44", "2022-12-03 22:51:07", "2022-12-03 22:26:50",
"2022-12-03 21:57:09", "2022-12-03 21:53:54", "2022-12-03 20:20:21",
"2022-12-03 20:13:07", "2022-12-03 20:10:30", "2022-12-03 19:50:21",
"2022-12-03 01:28:35", "2022-12-03 01:17:59", "2022-12-03 00:35:08",
"2022-12-03 00:31:56", "2022-12-03 00:11:25", "2022-12-03 18:36:51",
"2022-12-03 17:56:25", "2022-12-03 17:10:15", "2022-12-03 16:49:27",
"2022-12-03 16:45:03", "2022-12-03 16:43:26", "2022-12-03 16:37:55",
"2022-12-03 16:36:11", "2022-12-03 16:14:40", "2022-12-03 16:03:10",
"2022-12-03 16:02:56", "2022-12-03 15:32:37", "2022-12-03 15:30:45",
"2022-12-03 15:14:05", "2022-12-03 15:13:24", "2022-12-03 15:09:10",
"2022-12-03 12:20:58", "2022-12-03 11:54:15", "2022-12-03 11:36:53"
), Status = c("Processing", "Transaction Declined", "Successful",
"Processing", "Processing", "Transaction Declined", "Processing",
"Processing", "Processing", "Processing", "Processing", "Processing",
"Processing", "Processing", "Transaction Declined", "Transaction Declined",
"Processing", "Transaction Declined", "Transaction Declined",
"Transaction Declined", "Transaction Declined", "Transaction Declined",
"Transaction Declined", "Processing", "Processing", "Processing",
"Processing", "Processing", "Processing", "Processing", "Processing",
"Transaction Declined", "Processing", "Transaction Declined",
"Transaction Declined", "Transaction Declined", "Processing",
"Transaction Declined", "Transaction Declined", "Processing",
"Processing", "Transaction Declined", "Transaction Declined",
"Processing", "Processing", "Transaction Declined", "Transaction Declined",
"Transaction Declined", "Transaction Declined", "Transaction Declined"
), Discount... = c("6.00", "6.00", "5.00", "6.00", "6.00", "6.00",
"3.70", "3.70", "3.70", "3.70", "3.20", "3.20", "3.20", "3.20",
"6.00", "6.00", "6.00", "6.00", "3.50", "3.50", "3.50", "3.50",
"6.00", "6.00", "6.00", "6.00", "3.50", "3.50", "3.50", "3.50",
"3.50", "6.00", "6.00", "6.00", "6.00", "6.00", "6.00", "6.00",
"6.00", "6.00", "6.00", "6.00", "6.00", "6.00", "6.00", "6.00",
"6.00", "6.00", "6.00", "6.00"), Discount.NGN. = c("12.00", "12.00",
"100.00", "6.00", "6.00", "6.00", "3.70", "11.10", "18.50", "11.10",
"7.68", "19.20", "12.80", "19.20", "6.00", "30.00", "7.80", "12.00",
"17.50", "3.50", "3.50", "10.50", "9.00", "18.00", "18.00", "120.00",
"7.00", "8.40", "21.00", "14.00", "14.00", "30.00", "15.00",
"6.00", "6.00", "30.00", "27.00", "3.00", "78.00", "84.00", "84.00",
"120.00", "6.00", "7.80", "7.80", "6.00", "6.00", "12.00", "12.00",
"36.00"), Network.reformat = c("G", "G", "G-s", "X9", "G", "G",
"NEW", "NEW", "NEW", "NEW", "NEW",
"NEW", "NEW", "NEW", "G", "G", "G", "G", "A",
"A", "A", "A", "G", "G", "G", "G", "A", "A", "A", "A", "A", "G",
"G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G",
"G", "G", "G", "G", "G")), row.names = c(NA, -50L), class = c("tbl_df",
"tbl", "data.frame"))
I want my table to look like the table below.
Solution – 1
Use grouped summarize()
and mutate()
to get initial values, then do additional summaries within bind_rows()
to add subtotals and totals. Finally, use forcats::fct_relevel()
inside dplyr::arrange()
to put everything in order.
library(dplyr)
library(forcats)
samples_stack %>%
group_by(Network, Merchant) %>%
summarize(
Count_Merchant = n(),
Sum_FaceValue = sum(FaceValue),
.groups = "drop"
) %>%
mutate(
Pct_Merchant = Count_Merchant / sum(Count_Merchant),
Pct_FaceValue = Sum_FaceValue / sum(Sum_FaceValue)
) %>%
bind_rows(
summarize(
group_by(., Network),
Merchant = "SUBTOTAL",
across(Count_Merchant:Pct_FaceValue, sum)
),
summarize(
.,
across(Network:Merchant, ~ "TOTAL"),
across(Count_Merchant:Pct_FaceValue, sum)
)
) %>%
arrange(
fct_relevel(Network, "TOTAL", after = Inf),
fct_relevel(Merchant, "SUBTOTAL")
)
# A tibble: 12 × 6
Network Merchant Count_Merchant Sum_FaceValue Pct_Merchant Pct_FaceValue
<chr> <chr> <int> <dbl> <dbl> <dbl>
1 A SUBTOTAL 9 2840 0.18 0.130
2 A Fat 4 1000 0.08 0.0457
3 A WAVE 5 1840 0.1 0.0841
4 G SUBTOTAL 32 15890 0.64 0.727
5 G Fat 31 13890 0.62 0.635
6 G United 1 2000 0.02 0.0914
7 M SUBTOTAL 8 3040 0.16 0.139
8 M Fat 4 1200 0.08 0.0549
9 M WAVE 4 1840 0.08 0.0841
10 X9 SUBTOTAL 1 100 0.02 0.00457
11 X9 WAVE 1 100 0.02 0.00457
12 TOTAL TOTAL 50 21870 1 1