R grouping and summarizing but conditional up to a given date, for each date

Here is my dataframe

structure(list(Date = structure(c(19083, 19083, 19083, 19083, 
19083, 19083, 19083, 19083, 19083, 19083, 19083, 19083, 19084, 
19084, 19084, 19084, 19084, 19084, 19084, 19084, 19084, 19084, 
19084, 19084, 19085, 19085, 19085, 19085, 19085, 19085, 19085, 
19085, 19085, 19085, 19085, 19085, 19086, 19086, 19086, 19086, 
19086, 19086, 19086, 19086, 19086, 19086, 19086, 19086, 19087, 
19087, 19087, 19087, 19087, 19087, 19087, 19087, 19087, 19087, 
19087, 19087, 19088, 19088, 19088, 19088, 19088, 19088, 19088, 
19088, 19088, 19088, 19088, 19088, 19089, 19089, 19089, 19089, 
19089, 19089, 19089, 19089, 19089, 19089, 19089, 19089, 19090, 
19090, 19090, 19090, 19090, 19090, 19090, 19090, 19090, 19090, 
19090, 19090, 19091, 19091, 19091, 19091, 19091, 19091, 19091, 
19091, 19091, 19091, 19091, 19091, 19092, 19092, 19092, 19092, 
19092, 19092, 19092, 19092, 19092, 19092, 19092, 19092, 19093, 
19093, 19093, 19093, 19093, 19093, 19093, 19093, 19093, 19093, 
19093, 19093, 19094, 19094, 19094, 19094, 19094, 19094, 19094, 
19094, 19094, 19094, 19094, 19094, 19095, 19095, 19095, 19095, 
19095, 19095, 19095, 19095, 19095, 19095, 19095, 19095), class = "Date"), 
    Day_of_Week = c("Friday", "Friday", "Friday", "Friday", "Friday", 
    "Friday", "Friday", "Friday", "Friday", "Friday", "Friday", 
    "Friday", "Saturday", "Saturday", "Saturday", "Saturday", 
    "Saturday", "Saturday", "Saturday", "Saturday", "Saturday", 
    "Saturday", "Saturday", "Saturday", "Sunday", "Sunday", "Sunday", 
    "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", 
    "Sunday", "Sunday", "Sunday", "Monday", "Monday", "Monday", 
    "Monday", "Monday", "Monday", "Monday", "Monday", "Monday", 
    "Monday", "Monday", "Monday", "Tuesday", "Tuesday", "Tuesday", 
    "Tuesday", "Tuesday", "Tuesday", "Tuesday", "Tuesday", "Tuesday", 
    "Tuesday", "Tuesday", "Tuesday", "Wednesday", "Wednesday", 
    "Wednesday", "Wednesday", "Wednesday", "Wednesday", "Wednesday", 
    "Wednesday", "Wednesday", "Wednesday", "Wednesday", "Wednesday", 
    "Thursday", "Thursday", "Thursday", "Thursday", "Thursday", 
    "Thursday", "Thursday", "Thursday", "Thursday", "Thursday", 
    "Thursday", "Thursday", "Friday", "Friday", "Friday", "Friday", 
    "Friday", "Friday", "Friday", "Friday", "Friday", "Friday", 
    "Friday", "Friday", "Saturday", "Saturday", "Saturday", "Saturday", 
    "Saturday", "Saturday", "Saturday", "Saturday", "Saturday", 
    "Saturday", "Saturday", "Saturday", "Sunday", "Sunday", "Sunday", 
    "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", 
    "Sunday", "Sunday", "Sunday", "Monday", "Monday", "Monday", 
    "Monday", "Monday", "Monday", "Monday", "Monday", "Monday", 
    "Monday", "Monday", "Monday", "Tuesday", "Tuesday", "Tuesday", 
    "Tuesday", "Tuesday", "Tuesday", "Tuesday", "Tuesday", "Tuesday", 
    "Tuesday", "Tuesday", "Tuesday", "Wednesday", "Wednesday", 
    "Wednesday", "Wednesday", "Wednesday", "Wednesday", "Wednesday", 
    "Wednesday", "Wednesday", "Wednesday", "Wednesday", "Wednesday"
    ), Hour = c("10am", "11am", "12pm", "1pm", "2pm", "3pm", 
    "4pm", "5pm", "6pm", "7pm", "8pm", "9pm", "10am", "11am", 
    "12pm", "1pm", "2pm", "3pm", "4pm", "5pm", "6pm", "7pm", 
    "8pm", "9pm", "10am", "11am", "12pm", "1pm", "2pm", "3pm", 
    "4pm", "5pm", "6pm", "7pm", "8pm", "9pm", "10am", "11am", 
    "12pm", "1pm", "2pm", "3pm", "4pm", "5pm", "6pm", "7pm", 
    "8pm", "9pm", "10am", "11am", "12pm", "1pm", "2pm", "3pm", 
    "4pm", "5pm", "6pm", "7pm", "8pm", "9pm", "10am", "11am", 
    "12pm", "1pm", "2pm", "3pm", "4pm", "5pm", "6pm", "7pm", 
    "8pm", "9pm", "10am", "11am", "12pm", "1pm", "2pm", "3pm", 
    "4pm", "5pm", "6pm", "7pm", "8pm", "9pm", "10am", "11am", 
    "12pm", "1pm", "2pm", "3pm", "4pm", "5pm", "6pm", "7pm", 
    "8pm", "9pm", "10am", "11am", "12pm", "1pm", "2pm", "3pm", 
    "4pm", "5pm", "6pm", "7pm", "8pm", "9pm", "10am", "11am", 
    "12pm", "1pm", "2pm", "3pm", "4pm", "5pm", "6pm", "7pm", 
    "8pm", "9pm", "10am", "11am", "12pm", "1pm", "2pm", "3pm", 
    "4pm", "5pm", "6pm", "7pm", "8pm", "9pm", "10am", "11am", 
    "12pm", "1pm", "2pm", "3pm", "4pm", "5pm", "6pm", "7pm", 
    "8pm", "9pm", "10am", "11am", "12pm", "1pm", "2pm", "3pm", 
    "4pm", "5pm", "6pm", "7pm", "8pm", "9pm"), Date_Time = structure(c(1648807200, 
    1648810800, 1648814400, 1648818000, 1648821600, 1648825200, 
    1648828800, 1648832400, 1648836000, 1648839600, 1648843200, 
    1648846800, 1648893600, 1648897200, 1648900800, 1648904400, 
    1648908000, 1648911600, 1648915200, 1648918800, 1648922400, 
    1648926000, 1648929600, 1648933200, 1648980000, 1648983600, 
    1648987200, 1648990800, 1648994400, 1648998000, 1649001600, 
    1649005200, 1649008800, 1649012400, 1649016000, 1649019600, 
    1649066400, 1649070000, 1649073600, 1649077200, 1649080800, 
    1649084400, 1649088000, 1649091600, 1649095200, 1649098800, 
    1649102400, 1649106000, 1649152800, 1649156400, 1649160000, 
    1649163600, 1649167200, 1649170800, 1649174400, 1649178000, 
    1649181600, 1649185200, 1649188800, 1649192400, 1649239200, 
    1649242800, 1649246400, 1649250000, 1649253600, 1649257200, 
    1649260800, 1649264400, 1649268000, 1649271600, 1649275200, 
    1649278800, 1649325600, 1649329200, 1649332800, 1649336400, 
    1649340000, 1649343600, 1649347200, 1649350800, 1649354400, 
    1649358000, 1649361600, 1649365200, 1649412000, 1649415600, 
    1649419200, 1649422800, 1649426400, 1649430000, 1649433600, 
    1649437200, 1649440800, 1649444400, 1649448000, 1649451600, 
    1649498400, 1649502000, 1649505600, 1649509200, 1649512800, 
    1649516400, 1649520000, 1649523600, 1649527200, 1649530800, 
    1649534400, 1649538000, 1649584800, 1649588400, 1649592000, 
    1649595600, 1649599200, 1649602800, 1649606400, 1649610000, 
    1649613600, 1649617200, 1649620800, 1649624400, 1649671200, 
    1649674800, 1649678400, 1649682000, 1649685600, 1649689200, 
    1649692800, 1649696400, 1649700000, 1649703600, 1649707200, 
    1649710800, 1649757600, 1649761200, 1649764800, 1649768400, 
    1649772000, 1649775600, 1649779200, 1649782800, 1649786400, 
    1649790000, 1649793600, 1649797200, 1649844000, 1649847600, 
    1649851200, 1649854800, 1649858400, 1649862000, 1649865600, 
    1649869200, 1649872800, 1649876400, 1649880000, 1649883600
    ), tzone = "UTC", class = c("POSIXct", "POSIXt")), Sales = c(0, 
    307470, 507533, 728256, 959485, 1200713, 1423500, 1640529, 
    1850180, 2046532, 2221283, 2382291.91, 0, 285560, 486728, 
    701417, 943416, 1185898, 1395710, 1604240, 1793172, 1962999, 
    2113823, 2232717, 0, 158881, 326370, 494727, 676647, 850244, 
    1021835, 1167815, 1302997, 1421208, 1542626, 1542626, 0, 
    241132, 407313, 584655, 780747, 983805, 1185358, 1374598, 
    1569981, 1751983, 1918465, 2040527, 0, 266487, 422864, 593228, 
    771899, 951790, 1129720, 1293674, 1464121, 1624755, 1777559, 
    1895461, 0, 275884, 456859, 652361, 860750, 1063475, 1256408, 
    1449684, 1656636, 1838144.25984455, 2003701, 2139876, 0, 
    259078, 419011, 588710, 766509, 940493, 1109202, 1283608, 
    1467065, 1631987, 1773653, 1892431, 0, 304141, 476994, 668898, 
    802665, 875109, 1094131, 1471062, 1733966, 1985945, 2225504, 
    2450605, 0, 449053, 697820, 952431, 1210898, 1464442, 1707353, 
    1946528, 2146538, 2333322, 2515627, 2674651, 0, 191057, 398300, 
    621097, 859606, 1102018, 1338608, 1547151, 1735105, 1909331, 
    2076719, 2076719, 0, 249006, 413848, 589790, 776803, 971293, 
    1154215, 1341453, 1532779, 1704763, 1845413, 1975962, 0, 
    242039, 395044, 555791, 710396, 891304, 1055436, 1217639, 
    1380435, 1536080, 1665876, 1781080, 0, 288190, 464692, 652759, 
    853812, 1067162, 1259819, 1451037, 1652630, 1842384, 2013646, 
    2143242)), row.names = c(NA, -156L), class = c("tbl_df", 
"tbl", "data.frame"))

And here is the output of the glimpse() function.

Rows: 156
Columns: 5
$ Date        <date> 2022-04-01, 2022-04-01, 2022-04-01, 2022-04-01, 2022-04-01, 2022-04-01, 2022-04-01, 2022-04-01…
$ Day_of_Week <chr> "Friday", "Friday", "Friday", "Friday", "Friday", "Friday", "Friday", "Friday", "Friday", "Frid…
$ Hour        <chr> "10am", "11am", "12pm", "1pm", "2pm", "3pm", "4pm", "5pm", "6pm", "7pm", "8pm", "9pm", "10am", …
$ Date_Time   <dttm> 2022-04-01 10:00:00, 2022-04-01 11:00:00, 2022-04-01 12:00:00, 2022-04-01 13:00:00, 2022-04-01…
$ Sales       <dbl> 0, 307470, 507533, 728256, 959485, 1200713, 1423500, 1640529, 1850180, 2046532, 2221283, 238229…

I’m trying to figure out the average orders that come through for each day and each hour grouped together, but only up to and not including the current day.

Here is the code that I have so far. It creates an output that shows the average orders that come in on a Friday at 10am, and the average orders that come in on Friday at 11am, etc.

dataframe3 %>%
# some kind of filter on the date column? 
  group_by(Day_of_Week, Hour) %>%
  summarize(Average_Orders_Date_and_Hour = mean(Sales)) %>%
  mutate(Percent_of_Total = Average_Orders_Date_and_Hour / max(Average_Orders_Date_and_Hour))

What I’m really looking for, though, is a way of versioning. The output I’m hoping for is a much longer dataframe that would show the result of this calculation up to a given day.

The code that I created compiles OK. It creates a dataframe that has 84 rows. But I need a dataframe that has 84 rows for each date in the dataframe. For the date 2020-06-01, it would group the day or week and the hour, then calculate the average for everything previous.

Leave a Comment