How subtotal a dataframe and also grandtotal

How subtotal a dataframe and also grandtotal

Problem Description:

I’m looking for a way to summarize my dataframe into a summary table that will have the total sum of each observation and the entire sum in percentage and also the total count and total count in percentage and each having a subgroup of total and also Grand total of each observation.

here is my sample dataset below.

struct<- samples_stack <- dput(samples[1:50,])
structure(list(Merchant = c("Fat", "Fat", "United", "WAVE", "Fat", 
                            "Fat", "Fat", "Fat", "Fat", "Fat", "WAVE", "WAVE", "WAVE", "WAVE", 
                            "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", 
                            "Fat", "Fat", "Fat", "WAVE", "WAVE", "WAVE", "WAVE", "WAVE", 
                            "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", 
                            "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", "Fat", 
                            "Fat"), Network = c("G", "G", "G", "X9", "G", "G", "M", "M", 
                                                "M", "M", "M", "M", "M", "M", "G", "G", "G", "G", "A", "A", "A", 
                                                "A", "G", "G", "G", "G", "A", "A", "A", "A", "A", "G", "G", "G", 
                                                "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", 
                                                "G", "G", "G"), Type = c("Airtime", "Airtime", "Airtime", "Airtime", 
                                                                         "Airtime", "Airtime", "Airtime", "Airtime", "Airtime", "Airtime", 
                                                                         "Airtime", "Airtime", "Airtime", "Airtime", "Airtime", "Airtime", 
                                                                         "Data", "Airtime", "Data", "Airtime", "Airtime", "Data", "Airtime", 
                                                                         "Data", "Data", "Airtime", "Airtime", "Airtime", "Airtime", "Airtime", 
                                                                         "Airtime", "Airtime", "Data", "Airtime", "Airtime", "Airtime", 
                                                                         "Data", "Airtime", "Airtime", "Data", "Data", "Airtime", "Airtime", 
                                                                         "Data", "Data", "Airtime", "Airtime", "Airtime", "Airtime", "Airtime"
                                                ), FaceValue = c(200, 200, 2000, 100, 100, 100, 100, 300, 500, 
                                                                 300, 240, 600, 400, 600, 100, 500, 130, 200, 500, 100, 100, 300, 
                                                                 150, 300, 300, 2000, 200, 240, 600, 400, 400, 500, 250, 100, 
                                                                 100, 500, 450, 50, 1300, 1400, 1400, 2000, 100, 130, 130, 100, 
                                                                 100, 200, 200, 600), Date = c("2022-12-03 23:37:23", "2022-12-03 22:45:52", 
                                                                                               "2022-12-03 06:58:19", "2022-12-03 14:06:28", "2022-12-03 22:19:13", 
                                                                                               "2022-12-03 22:15:39", "2022-12-03 20:02:12", "2022-12-03 20:01:07", 
                                                                                               "2022-12-03 19:14:25", "2022-12-03 14:47:35", "2022-12-03 23:12:40", 
                                                                                               "2022-12-03 23:09:18", "2022-12-03 22:57:57", "2022-12-03 22:51:16", 
                                                                                               "2022-12-03 21:23:38", "2022-12-03 21:19:43", "2022-12-03 21:03:38", 
                                                                                               "2022-12-03 20:57:44", "2022-12-03 22:51:07", "2022-12-03 22:26:50", 
                                                                                               "2022-12-03 21:57:09", "2022-12-03 21:53:54", "2022-12-03 20:20:21", 
                                                                                               "2022-12-03 20:13:07", "2022-12-03 20:10:30", "2022-12-03 19:50:21", 
                                                                                               "2022-12-03 01:28:35", "2022-12-03 01:17:59", "2022-12-03 00:35:08", 
                                                                                               "2022-12-03 00:31:56", "2022-12-03 00:11:25", "2022-12-03 18:36:51", 
                                                                                               "2022-12-03 17:56:25", "2022-12-03 17:10:15", "2022-12-03 16:49:27", 
                                                                                               "2022-12-03 16:45:03", "2022-12-03 16:43:26", "2022-12-03 16:37:55", 
                                                                                               "2022-12-03 16:36:11", "2022-12-03 16:14:40", "2022-12-03 16:03:10", 
                                                                                               "2022-12-03 16:02:56", "2022-12-03 15:32:37", "2022-12-03 15:30:45", 
                                                                                               "2022-12-03 15:14:05", "2022-12-03 15:13:24", "2022-12-03 15:09:10", 
                                                                                               "2022-12-03 12:20:58", "2022-12-03 11:54:15", "2022-12-03 11:36:53"
                                                                 ), Status = c("Processing", "Transaction Declined", "Successful", 
                                                                               "Processing", "Processing", "Transaction Declined", "Processing", 
                                                                               "Processing", "Processing", "Processing", "Processing", "Processing", 
                                                                               "Processing", "Processing", "Transaction Declined", "Transaction Declined", 
                                                                               "Processing", "Transaction Declined", "Transaction Declined", 
                                                                               "Transaction Declined", "Transaction Declined", "Transaction Declined", 
                                                                               "Transaction Declined", "Processing", "Processing", "Processing", 
                                                                               "Processing", "Processing", "Processing", "Processing", "Processing", 
                                                                               "Transaction Declined", "Processing", "Transaction Declined", 
                                                                               "Transaction Declined", "Transaction Declined", "Processing", 
                                                                               "Transaction Declined", "Transaction Declined", "Processing", 
                                                                               "Processing", "Transaction Declined", "Transaction Declined", 
                                                                               "Processing", "Processing", "Transaction Declined", "Transaction Declined", 
                                                                               "Transaction Declined", "Transaction Declined", "Transaction Declined"
                                                                 ), Discount... = c("6.00", "6.00", "5.00", "6.00", "6.00", "6.00", 
                                                                                    "3.70", "3.70", "3.70", "3.70", "3.20", "3.20", "3.20", "3.20", 
                                                                                    "6.00", "6.00", "6.00", "6.00", "3.50", "3.50", "3.50", "3.50", 
                                                                                    "6.00", "6.00", "6.00", "6.00", "3.50", "3.50", "3.50", "3.50", 
                                                                                    "3.50", "6.00", "6.00", "6.00", "6.00", "6.00", "6.00", "6.00", 
                                                                                    "6.00", "6.00", "6.00", "6.00", "6.00", "6.00", "6.00", "6.00", 
                                                                                    "6.00", "6.00", "6.00", "6.00"), Discount.NGN. = c("12.00", "12.00", 
                                                                                                                                       "100.00", "6.00", "6.00", "6.00", "3.70", "11.10", "18.50", "11.10", 
                                                                                                                                       "7.68", "19.20", "12.80", "19.20", "6.00", "30.00", "7.80", "12.00", 
                                                                                                                                       "17.50", "3.50", "3.50", "10.50", "9.00", "18.00", "18.00", "120.00", 
                                                                                                                                       "7.00", "8.40", "21.00", "14.00", "14.00", "30.00", "15.00", 
                                                                                                                                       "6.00", "6.00", "30.00", "27.00", "3.00", "78.00", "84.00", "84.00", 
                                                                                                                                       "120.00", "6.00", "7.80", "7.80", "6.00", "6.00", "12.00", "12.00", 
                                                                                                                                       "36.00"), Network.reformat = c("G", "G", "G-s", "X9", "G", "G", 
                                                                                                                                                                      "NEW", "NEW", "NEW", "NEW", "NEW", 
                                                                                                                                                                      "NEW", "NEW", "NEW", "G", "G", "G", "G", "A", 
                                                                                                                                                                      "A", "A", "A", "G", "G", "G", "G", "A", "A", "A", "A", "A", "G", 
                                                                                                                                                                      "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", 
                                                                                                                                                                      "G", "G", "G", "G", "G")), row.names = c(NA, -50L), class = c("tbl_df", 
                                                                                                                                                                                                                                    "tbl", "data.frame"))

I want my table to look like the table below.

enter image description here

Solution – 1

Use grouped summarize() and mutate() to get initial values, then do additional summaries within bind_rows() to add subtotals and totals. Finally, use forcats::fct_relevel() inside dplyr::arrange() to put everything in order.

library(dplyr)
library(forcats)

samples_stack %>%
  group_by(Network, Merchant) %>%
  summarize(
    Count_Merchant = n(), 
    Sum_FaceValue = sum(FaceValue),
    .groups = "drop"
  ) %>%
  mutate(
    Pct_Merchant = Count_Merchant / sum(Count_Merchant),
    Pct_FaceValue = Sum_FaceValue / sum(Sum_FaceValue)
  ) %>%
  bind_rows(
    summarize(
      group_by(., Network),
      Merchant = "SUBTOTAL",
      across(Count_Merchant:Pct_FaceValue, sum)
    ),
    summarize(
      .,
      across(Network:Merchant, ~ "TOTAL"),
      across(Count_Merchant:Pct_FaceValue, sum)
    )
  ) %>%
  arrange(
    fct_relevel(Network, "TOTAL", after = Inf),
    fct_relevel(Merchant, "SUBTOTAL")
  )
# A tibble: 12 × 6
   Network Merchant Count_Merchant Sum_FaceValue Pct_Merchant Pct_FaceValue
   <chr>   <chr>             <int>         <dbl>        <dbl>         <dbl>
 1 A       SUBTOTAL              9          2840         0.18       0.130  
 2 A       Fat                   4          1000         0.08       0.0457 
 3 A       WAVE                  5          1840         0.1        0.0841 
 4 G       SUBTOTAL             32         15890         0.64       0.727  
 5 G       Fat                  31         13890         0.62       0.635  
 6 G       United                1          2000         0.02       0.0914 
 7 M       SUBTOTAL              8          3040         0.16       0.139  
 8 M       Fat                   4          1200         0.08       0.0549 
 9 M       WAVE                  4          1840         0.08       0.0841 
10 X9      SUBTOTAL              1           100         0.02       0.00457
11 X9      WAVE                  1           100         0.02       0.00457
12 TOTAL   TOTAL                50         21870         1          1      
Rate this post
We use cookies in order to give you the best possible experience on our website. By continuing to use this site, you agree to our use of cookies.
Accept
Reject