California energy data
ca_energy_generation.csv
and ca_energy_imports.csv
## 'data.frame': 168 obs. of 12 variables:
## $ datetime : chr "2019-09-03 00:00:00" "2019-09-03 01:00:00" "2019-09-03 02:00:00" "2019-09-03 03:00:00" ...
## $ biogas : num 239 239 239 239 238 ...
## $ biomass : num 407 406 407 407 406 ...
## $ coal : num 9 9 9 9.83 9.42 ...
## $ geothermal : num 967 970 973 974 975 ...
## $ large_hydro: num 3152 2724 2630 2611 2651 ...
## $ natural_gas: num 9711 9155 8078 7801 7744 ...
## $ nuclear : num 2249 2251 2251 2245 2243 ...
## $ other : int 0 0 0 0 0 0 0 0 0 0 ...
## $ small_hydro: num 402 401 414 360 382 ...
## $ solar : num 0 0 0 0 0 ...
## $ wind : num 2670 2779 2795 2583 2397 ...
## [1] "character"
## [1] "character"
lubridate
lubridate
packageas_datetime
function
readr::read_csv
is that it will often detect and convert datetime variables when importinglibrary(lubridate)
generation$datetime <- as_datetime(generation$datetime)
class(generation$datetime)
## [1] "POSIXct" "POSIXt"
## [1] "2019-09-03 00:00:00 UTC" "2019-09-03 01:00:00 UTC"
## [3] "2019-09-03 02:00:00 UTC" "2019-09-03 03:00:00 UTC"
## [5] "2019-09-03 04:00:00 UTC" "2019-09-03 05:00:00 UTC"
lubridate
## [1] "POSIXct" "POSIXt"
## [1] "2019-09-03 00:00:00 UTC" "2019-09-03 01:00:00 UTC"
## [3] "2019-09-03 02:00:00 UTC" "2019-09-03 03:00:00 UTC"
## [5] "2019-09-03 04:00:00 UTC" "2019-09-03 05:00:00 UTC"
## datetime biogas biomass coal geothermal large_hydro
## 1 2019-09-03 00:00:00 238.9167 407.0833 9.000000 967.2500 3152.500
## 2 2019-09-03 01:00:00 239.0000 406.5000 9.000000 970.3333 2724.333
## 3 2019-09-03 02:00:00 239.0000 406.8333 9.000000 973.1667 2630.000
## 4 2019-09-03 03:00:00 238.9167 407.1667 9.833333 973.8333 2611.250
## 5 2019-09-03 04:00:00 237.9167 406.0833 9.416667 974.6667 2651.333
## 6 2019-09-03 05:00:00 237.1667 408.9167 10.000000 973.3333 2965.500
## natural_gas nuclear other small_hydro solar wind
## 1 9710.833 2248.750 0 401.6667 0 2670.167
## 2 9155.000 2250.583 0 400.9167 0 2778.833
## 3 8078.333 2250.833 0 413.5833 0 2795.250
## 4 7801.250 2245.000 0 359.9167 0 2582.583
## 5 7744.333 2242.833 0 382.0833 0 2397.417
## 6 8624.583 2242.833 0 395.9167 0 2327.000
reshape2
melt
–> make data longdcast
–> make data widerecast
–> melt then cast datageneration
dataframe has several observations per row## datetime biogas biomass coal geothermal large_hydro
## 1 2019-09-03 00:00:00 238.9167 407.0833 9.000000 967.2500 3152.500
## 2 2019-09-03 01:00:00 239.0000 406.5000 9.000000 970.3333 2724.333
## 3 2019-09-03 02:00:00 239.0000 406.8333 9.000000 973.1667 2630.000
## 4 2019-09-03 03:00:00 238.9167 407.1667 9.833333 973.8333 2611.250
## 5 2019-09-03 04:00:00 237.9167 406.0833 9.416667 974.6667 2651.333
## 6 2019-09-03 05:00:00 237.1667 408.9167 10.000000 973.3333 2965.500
## natural_gas nuclear other small_hydro solar wind
## 1 9710.833 2248.750 0 401.6667 0 2670.167
## 2 9155.000 2250.583 0 400.9167 0 2778.833
## 3 8078.333 2250.833 0 413.5833 0 2795.250
## 4 7801.250 2245.000 0 359.9167 0 2582.583
## 5 7744.333 2242.833 0 382.0833 0 2397.417
## 6 8624.583 2242.833 0 395.9167 0 2327.000
melt
the generation datamelt(df, id.vars = "id")
id.vars
library(reshape2)
long_gen <- melt(generation, id.vars = "datetime",
variable.name = "source",
value.name = "usage")
head(long_gen)
## datetime source usage
## 1 2019-09-03 00:00:00 biogas 238.9167
## 2 2019-09-03 01:00:00 biogas 239.0000
## 3 2019-09-03 02:00:00 biogas 239.0000
## 4 2019-09-03 03:00:00 biogas 238.9167
## 5 2019-09-03 04:00:00 biogas 237.9167
## 6 2019-09-03 05:00:00 biogas 237.1667
melt
the generation data## datetime source usage
## 1 2019-09-03 biogas 238.9167
## 169 2019-09-03 biomass 407.0833
## 337 2019-09-03 coal 9.0000
## 505 2019-09-03 geothermal 967.2500
## 673 2019-09-03 large_hydro 3152.5000
## 841 2019-09-03 natural_gas 9710.8333
merge
merge(x, y, by = c("id", "year"))
x
: first dataframey
: second dataframeby
: variables to match (must have common name)merge
argumentsby.x
and by.y
if the dataframes have different variable namesall.x = T
if you want to keep all the observation in the first dataframe (unmatched observations in y
are dropped!)all.y = T
if you want to keep all observations in the second dataframe (umatched observations in x
are dropped!)all = T
) to keep all observations!datetime
merge
to join the generation
and imports
dataframes, using the datetime
variable to matchdatetime
## [1] 168 13
## datetime biogas biomass coal geothermal large_hydro
## 1 2019-09-03 00:00:00 238.9167 407.0833 9.000000 967.2500 3152.500
## 2 2019-09-03 01:00:00 239.0000 406.5000 9.000000 970.3333 2724.333
## 3 2019-09-03 02:00:00 239.0000 406.8333 9.000000 973.1667 2630.000
## 4 2019-09-03 03:00:00 238.9167 407.1667 9.833333 973.8333 2611.250
## 5 2019-09-03 04:00:00 237.9167 406.0833 9.416667 974.6667 2651.333
## 6 2019-09-03 05:00:00 237.1667 408.9167 10.000000 973.3333 2965.500
## natural_gas nuclear other small_hydro solar wind imports
## 1 9710.833 2248.750 0 401.6667 0 2670.167 7527.333
## 2 9155.000 2250.583 0 400.9167 0 2778.833 6930.333
## 3 8078.333 2250.833 0 413.5833 0 2795.250 7005.833
## 4 7801.250 2245.000 0 359.9167 0 2582.583 7080.583
## 5 7744.333 2242.833 0 382.0833 0 2397.417 7421.667
## 6 8624.583 2242.833 0 395.9167 0 2327.000 7511.667
long_merged_energy
long_merged_energy <- melt(merged_energy, id.vars = "datetime",
variable.name = "source",
value.name = "usage")
head(long_merged_energy)
## datetime source usage
## 1 2019-09-03 00:00:00 biogas 238.9167
## 2 2019-09-03 01:00:00 biogas 239.0000
## 3 2019-09-03 02:00:00 biogas 239.0000
## 4 2019-09-03 03:00:00 biogas 238.9167
## 5 2019-09-03 04:00:00 biogas 237.9167
## 6 2019-09-03 05:00:00 biogas 237.1667
dplyr
, and data.table
, are two packages used to process data in tabular formdplyr
version and a data.table
versiondplyr
: chain together a few, simple functionsdata.table
: compact, syntax-based commandsdata.table
is much faster on large data (dplyr
is explicitly meant for small-to-medium data)dplyr
is an R package for data manipulation developed by Hadley Wickham with help from lots of other R developerstidyverse
dplyr
verbsselect
–> subset variablesfilter
–> subset observations based on conditionsmutate
–> add new variablessummarize
–> reduce multiple observations to a single value (e.g., find the mean)dplyr
verbs - select
select(gapminder, continent, pop)
select(df, c(1, 3, 10))
select(df, country:pop)
or select(df, 1:3)
-
select(df, -gdpPercap)
dplyr
verbs - select
## [1] "biogas" "biomass" "geothermal" "solar"
## [1] "datetime" "coal" "large_hydro" "natural_gas" "nuclear"
## [6] "other" "small_hydro" "wind" "imports"
one_of()
, contains()
, starts_with()
, ends_with()
, matches()
## [1] "large_hydro" "small_hydro" "biogas" "biomass"
filter
## [1] 78
## datetime biogas biomass coal geothermal large_hydro
## 1 2019-09-03 00:00:00 238.9167 407.0833 9.000000 967.2500 3152.500
## 2 2019-09-03 02:00:00 239.0000 406.8333 9.000000 973.1667 2630.000
## 3 2019-09-03 03:00:00 238.9167 407.1667 9.833333 973.8333 2611.250
## 4 2019-09-03 04:00:00 237.9167 406.0833 9.416667 974.6667 2651.333
## 5 2019-09-03 05:00:00 237.1667 408.9167 10.000000 973.3333 2965.500
## 6 2019-09-03 06:00:00 236.7500 406.0833 9.166667 972.3333 3809.500
## natural_gas nuclear other small_hydro solar wind imports
## 1 9710.833 2248.750 0 401.6667 0.00000 2670.167 7527.333
## 2 8078.333 2250.833 0 413.5833 0.00000 2795.250 7005.833
## 3 7801.250 2245.000 0 359.9167 0.00000 2582.583 7080.583
## 4 7744.333 2242.833 0 382.0833 0.00000 2397.417 7421.667
## 5 8624.583 2242.833 0 395.9167 0.00000 2327.000 7511.667
## 6 9544.583 2244.833 0 407.5000 77.16667 2048.833 8172.667
filter
## [1] 20
## datetime biogas biomass coal geothermal large_hydro
## 1 2019-09-07 03:00:00 240.1667 358.9167 9.166667 964.7500 2413.833
## 2 2019-09-07 06:00:00 238.8333 371.3333 8.500000 971.7500 2649.417
## 3 2019-09-08 00:00:00 236.8333 389.7500 5.416667 970.2500 2697.000
## 4 2019-09-08 01:00:00 237.2500 388.8333 4.750000 969.3333 2654.083
## 5 2019-09-08 02:00:00 237.1667 374.3333 4.083333 969.9167 2217.667
## 6 2019-09-08 03:00:00 237.8333 342.5000 4.000000 972.5000 2090.917
## natural_gas nuclear other small_hydro solar wind imports
## 1 6667.417 2255.083 0 378.4167 0.00000 3809.917 7129.667
## 2 6553.917 2257.083 0 408.5000 67.83333 3643.750 7338.250
## 3 6364.333 2251.167 0 403.7500 0.00000 3628.083 8077.583
## 4 5037.917 2254.000 0 403.3333 0.00000 3118.083 8658.583
## 5 4530.750 2255.583 0 385.0833 0.00000 2928.083 8806.583
## 6 4079.500 2257.500 0 377.5000 0.00000 2726.583 8887.167
mutate
mutate
creates new variables## datetime source usage log_usage
## 1 2019-09-03 00:00:00 biogas 238.9167 5.476115
## 2 2019-09-03 01:00:00 biogas 239.0000 5.476464
## 3 2019-09-03 02:00:00 biogas 239.0000 5.476464
## 4 2019-09-03 03:00:00 biogas 238.9167 5.476115
## 5 2019-09-03 04:00:00 biogas 237.9167 5.471920
## 6 2019-09-03 05:00:00 biogas 237.1667 5.468763
mutate
tmp <- mutate(long_merged_energy, log_usage = log(usage), usage2 = usage^2, usage3 = usage^3)
head(tmp)
## datetime source usage log_usage usage2 usage3
## 1 2019-09-03 00:00:00 biogas 238.9167 5.476115 57081.17 13637644
## 2 2019-09-03 01:00:00 biogas 239.0000 5.476464 57121.00 13651919
## 3 2019-09-03 02:00:00 biogas 239.0000 5.476464 57121.00 13651919
## 4 2019-09-03 03:00:00 biogas 238.9167 5.476115 57081.17 13637644
## 5 2019-09-03 04:00:00 biogas 237.9167 5.471920 56604.34 13467116
## 6 2019-09-03 05:00:00 biogas 237.1667 5.468763 56248.03 13340157
summarize
summarize
reduces observations to a single value based on functions - mean
, sum
, sd
, min
, max
, etc.
## total
## 1 5223392
## mean_cons
## 1 2590.968
%>%
%>%
operator lets you chain together functions%>%
as “then”# take df then filter it then select these variables
# you do not need to repeat the name of the dataframe!
long_merged_energy %>%
filter(source == "geothermal") %>%
select(-datetime) %>%
mutate(log_usage = log(usage)) %>%
summarize(mean_log_usage = mean(log_usage, na.rm = T))
## mean_log_usage
## 1 6.872053
%>%
## datetime source usage
## 1 2019-09-03 00:00:00 geothermal 967.2500
## 2 2019-09-03 01:00:00 geothermal 970.3333
## 3 2019-09-03 02:00:00 geothermal 973.1667
## 4 2019-09-03 03:00:00 geothermal 973.8333
## 5 2019-09-03 04:00:00 geothermal 974.6667
## 6 2019-09-03 05:00:00 geothermal 973.3333
## 7 2019-09-03 06:00:00 geothermal 972.3333
## 8 2019-09-03 07:00:00 geothermal 972.0833
## 9 2019-09-03 08:00:00 geothermal 968.5000
## 10 2019-09-03 09:00:00 geothermal 963.6667
## 11 2019-09-03 10:00:00 geothermal 959.0833
## 12 2019-09-03 11:00:00 geothermal 954.4167
## 13 2019-09-03 12:00:00 geothermal 944.5833
## 14 2019-09-03 13:00:00 geothermal 944.0833
## 15 2019-09-03 14:00:00 geothermal 943.9167
## 16 2019-09-03 15:00:00 geothermal 950.6667
## 17 2019-09-03 16:00:00 geothermal 955.9167
## 18 2019-09-03 17:00:00 geothermal 953.0833
## 19 2019-09-03 18:00:00 geothermal 950.9167
## 20 2019-09-03 19:00:00 geothermal 950.7500
## 21 2019-09-03 20:00:00 geothermal 951.0000
## 22 2019-09-03 21:00:00 geothermal 956.2500
## 23 2019-09-03 22:00:00 geothermal 959.8333
## 24 2019-09-03 23:00:00 geothermal 962.9167
## 25 2019-09-04 00:00:00 geothermal 966.0833
## 26 2019-09-04 01:00:00 geothermal 966.3333
## 27 2019-09-04 02:00:00 geothermal 967.0833
## 28 2019-09-04 03:00:00 geothermal 966.9167
## 29 2019-09-04 04:00:00 geothermal 967.4167
## 30 2019-09-04 05:00:00 geothermal 967.7500
## 31 2019-09-04 06:00:00 geothermal 966.1667
## 32 2019-09-04 07:00:00 geothermal 964.5000
## 33 2019-09-04 08:00:00 geothermal 960.0833
## 34 2019-09-04 09:00:00 geothermal 954.9167
## 35 2019-09-04 10:00:00 geothermal 953.8333
## 36 2019-09-04 11:00:00 geothermal 951.2500
## 37 2019-09-04 12:00:00 geothermal 948.6667
## 38 2019-09-04 13:00:00 geothermal 947.4167
## 39 2019-09-04 14:00:00 geothermal 951.6667
## 40 2019-09-04 15:00:00 geothermal 952.5833
## 41 2019-09-04 16:00:00 geothermal 954.0833
## 42 2019-09-04 17:00:00 geothermal 954.0000
## 43 2019-09-04 18:00:00 geothermal 956.7500
## 44 2019-09-04 19:00:00 geothermal 958.8333
## 45 2019-09-04 20:00:00 geothermal 962.6667
## 46 2019-09-04 21:00:00 geothermal 965.9167
## 47 2019-09-04 22:00:00 geothermal 969.0833
## 48 2019-09-04 23:00:00 geothermal 970.8333
## 49 2019-09-05 00:00:00 geothermal 970.6667
## 50 2019-09-05 01:00:00 geothermal 969.9167
## 51 2019-09-05 02:00:00 geothermal 970.9167
## 52 2019-09-05 03:00:00 geothermal 973.2500
## 53 2019-09-05 04:00:00 geothermal 975.4167
## 54 2019-09-05 05:00:00 geothermal 976.9167
## 55 2019-09-05 06:00:00 geothermal 976.0833
## 56 2019-09-05 07:00:00 geothermal 976.5833
## 57 2019-09-05 08:00:00 geothermal 973.2500
## 58 2019-09-05 09:00:00 geothermal 968.6667
## 59 2019-09-05 10:00:00 geothermal 964.6667
## 60 2019-09-05 11:00:00 geothermal 961.5000
## 61 2019-09-05 12:00:00 geothermal 960.0000
## 62 2019-09-05 13:00:00 geothermal 956.4167
## 63 2019-09-05 14:00:00 geothermal 955.1667
## 64 2019-09-05 15:00:00 geothermal 957.6667
## 65 2019-09-05 16:00:00 geothermal 957.5000
## 66 2019-09-05 17:00:00 geothermal 953.7500
## 67 2019-09-05 18:00:00 geothermal 951.6667
## 68 2019-09-05 19:00:00 geothermal 951.6667
## 69 2019-09-05 20:00:00 geothermal 954.0000
## 70 2019-09-05 21:00:00 geothermal 959.6667
## 71 2019-09-05 22:00:00 geothermal 965.9167
## 72 2019-09-05 23:00:00 geothermal 968.4167
## 73 2019-09-06 00:00:00 geothermal 969.1667
## 74 2019-09-06 01:00:00 geothermal 970.4167
## 75 2019-09-06 02:00:00 geothermal 972.5833
## 76 2019-09-06 03:00:00 geothermal 973.3333
## 77 2019-09-06 04:00:00 geothermal 974.2500
## 78 2019-09-06 05:00:00 geothermal 976.5000
## 79 2019-09-06 06:00:00 geothermal 976.5833
## 80 2019-09-06 07:00:00 geothermal 975.7500
## 81 2019-09-06 08:00:00 geothermal 971.9167
## 82 2019-09-06 09:00:00 geothermal 968.0833
## 83 2019-09-06 10:00:00 geothermal 965.5000
## 84 2019-09-06 11:00:00 geothermal 962.3333
## 85 2019-09-06 12:00:00 geothermal 958.0833
## 86 2019-09-06 13:00:00 geothermal 955.6667
## 87 2019-09-06 14:00:00 geothermal 952.4167
## 88 2019-09-06 15:00:00 geothermal 950.6667
## 89 2019-09-06 16:00:00 geothermal 953.0833
## 90 2019-09-06 17:00:00 geothermal 954.3333
## 91 2019-09-06 18:00:00 geothermal 957.5833
## 92 2019-09-06 19:00:00 geothermal 960.8333
## 93 2019-09-06 20:00:00 geothermal 958.3333
## 94 2019-09-06 21:00:00 geothermal 959.9167
## 95 2019-09-06 22:00:00 geothermal 961.9167
## 96 2019-09-06 23:00:00 geothermal 964.5833
## 97 2019-09-07 00:00:00 geothermal 963.7500
## 98 2019-09-07 01:00:00 geothermal 965.1667
## 99 2019-09-07 02:00:00 geothermal 964.7500
## 100 2019-09-07 03:00:00 geothermal 964.7500
## 101 2019-09-07 04:00:00 geothermal 969.5000
## 102 2019-09-07 05:00:00 geothermal 970.6667
## 103 2019-09-07 06:00:00 geothermal 971.7500
## 104 2019-09-07 07:00:00 geothermal 972.0833
## 105 2019-09-07 08:00:00 geothermal 969.1667
## 106 2019-09-07 09:00:00 geothermal 964.9167
## 107 2019-09-07 10:00:00 geothermal 963.4167
## 108 2019-09-07 11:00:00 geothermal 961.3333
## 109 2019-09-07 12:00:00 geothermal 958.4167
## 110 2019-09-07 13:00:00 geothermal 955.3333
## 111 2019-09-07 14:00:00 geothermal 956.5000
## 112 2019-09-07 15:00:00 geothermal 956.4167
## 113 2019-09-07 16:00:00 geothermal 956.6667
## 114 2019-09-07 17:00:00 geothermal 956.1667
## 115 2019-09-07 18:00:00 geothermal 960.6667
## 116 2019-09-07 19:00:00 geothermal 963.1667
## 117 2019-09-07 20:00:00 geothermal 963.9167
## 118 2019-09-07 21:00:00 geothermal 964.5000
## 119 2019-09-07 22:00:00 geothermal 968.1667
## 120 2019-09-07 23:00:00 geothermal 971.0000
## 121 2019-09-08 00:00:00 geothermal 970.2500
## 122 2019-09-08 01:00:00 geothermal 969.3333
## 123 2019-09-08 02:00:00 geothermal 969.9167
## 124 2019-09-08 03:00:00 geothermal 972.5000
## 125 2019-09-08 04:00:00 geothermal 973.0000
## 126 2019-09-08 05:00:00 geothermal 972.5000
## 127 2019-09-08 06:00:00 geothermal 973.3333
## 128 2019-09-08 07:00:00 geothermal 975.0833
## 129 2019-09-08 08:00:00 geothermal 974.2500
## 130 2019-09-08 09:00:00 geothermal 970.8333
## 131 2019-09-08 10:00:00 geothermal 968.0833
## 132 2019-09-08 11:00:00 geothermal 965.3333
## 133 2019-09-08 12:00:00 geothermal 964.2500
## 134 2019-09-08 13:00:00 geothermal 961.7500
## 135 2019-09-08 14:00:00 geothermal 961.6667
## 136 2019-09-08 15:00:00 geothermal 961.0833
## 137 2019-09-08 16:00:00 geothermal 959.6667
## 138 2019-09-08 17:00:00 geothermal 959.5000
## 139 2019-09-08 18:00:00 geothermal 960.5000
## 140 2019-09-08 19:00:00 geothermal 963.6667
## 141 2019-09-08 20:00:00 geothermal 965.7500
## 142 2019-09-08 21:00:00 geothermal 966.7500
## 143 2019-09-08 22:00:00 geothermal 967.8333
## 144 2019-09-08 23:00:00 geothermal 970.3333
## 145 2019-09-09 00:00:00 geothermal 971.9167
## 146 2019-09-09 01:00:00 geothermal 974.1667
## 147 2019-09-09 02:00:00 geothermal 977.1667
## 148 2019-09-09 03:00:00 geothermal 977.1667
## 149 2019-09-09 04:00:00 geothermal 977.8333
## 150 2019-09-09 05:00:00 geothermal 977.5833
## 151 2019-09-09 06:00:00 geothermal 977.9167
## 152 2019-09-09 07:00:00 geothermal 978.5833
## 153 2019-09-09 08:00:00 geothermal 975.8333
## 154 2019-09-09 09:00:00 geothermal 970.8333
## 155 2019-09-09 10:00:00 geothermal 967.3333
## 156 2019-09-09 11:00:00 geothermal 965.2500
## 157 2019-09-09 12:00:00 geothermal 965.0833
## 158 2019-09-09 13:00:00 geothermal 964.2500
## 159 2019-09-09 14:00:00 geothermal 968.8333
## 160 2019-09-09 15:00:00 geothermal 969.0000
## 161 2019-09-09 16:00:00 geothermal 970.7500
## 162 2019-09-09 17:00:00 geothermal 974.6667
## 163 2019-09-09 18:00:00 geothermal 975.9167
## 164 2019-09-09 19:00:00 geothermal 978.5000
## 165 2019-09-09 20:00:00 geothermal 979.7500
## 166 2019-09-09 21:00:00 geothermal 980.5833
## 167 2019-09-09 22:00:00 geothermal 980.5833
## 168 2019-09-09 23:00:00 geothermal 980.3333
%>%
## source usage
## 1 geothermal 967.2500
## 2 geothermal 970.3333
## 3 geothermal 973.1667
## 4 geothermal 973.8333
## 5 geothermal 974.6667
## 6 geothermal 973.3333
## 7 geothermal 972.3333
## 8 geothermal 972.0833
## 9 geothermal 968.5000
## 10 geothermal 963.6667
## 11 geothermal 959.0833
## 12 geothermal 954.4167
## 13 geothermal 944.5833
## 14 geothermal 944.0833
## 15 geothermal 943.9167
## 16 geothermal 950.6667
## 17 geothermal 955.9167
## 18 geothermal 953.0833
## 19 geothermal 950.9167
## 20 geothermal 950.7500
## 21 geothermal 951.0000
## 22 geothermal 956.2500
## 23 geothermal 959.8333
## 24 geothermal 962.9167
## 25 geothermal 966.0833
## 26 geothermal 966.3333
## 27 geothermal 967.0833
## 28 geothermal 966.9167
## 29 geothermal 967.4167
## 30 geothermal 967.7500
## 31 geothermal 966.1667
## 32 geothermal 964.5000
## 33 geothermal 960.0833
## 34 geothermal 954.9167
## 35 geothermal 953.8333
## 36 geothermal 951.2500
## 37 geothermal 948.6667
## 38 geothermal 947.4167
## 39 geothermal 951.6667
## 40 geothermal 952.5833
## 41 geothermal 954.0833
## 42 geothermal 954.0000
## 43 geothermal 956.7500
## 44 geothermal 958.8333
## 45 geothermal 962.6667
## 46 geothermal 965.9167
## 47 geothermal 969.0833
## 48 geothermal 970.8333
## 49 geothermal 970.6667
## 50 geothermal 969.9167
## 51 geothermal 970.9167
## 52 geothermal 973.2500
## 53 geothermal 975.4167
## 54 geothermal 976.9167
## 55 geothermal 976.0833
## 56 geothermal 976.5833
## 57 geothermal 973.2500
## 58 geothermal 968.6667
## 59 geothermal 964.6667
## 60 geothermal 961.5000
## 61 geothermal 960.0000
## 62 geothermal 956.4167
## 63 geothermal 955.1667
## 64 geothermal 957.6667
## 65 geothermal 957.5000
## 66 geothermal 953.7500
## 67 geothermal 951.6667
## 68 geothermal 951.6667
## 69 geothermal 954.0000
## 70 geothermal 959.6667
## 71 geothermal 965.9167
## 72 geothermal 968.4167
## 73 geothermal 969.1667
## 74 geothermal 970.4167
## 75 geothermal 972.5833
## 76 geothermal 973.3333
## 77 geothermal 974.2500
## 78 geothermal 976.5000
## 79 geothermal 976.5833
## 80 geothermal 975.7500
## 81 geothermal 971.9167
## 82 geothermal 968.0833
## 83 geothermal 965.5000
## 84 geothermal 962.3333
## 85 geothermal 958.0833
## 86 geothermal 955.6667
## 87 geothermal 952.4167
## 88 geothermal 950.6667
## 89 geothermal 953.0833
## 90 geothermal 954.3333
## 91 geothermal 957.5833
## 92 geothermal 960.8333
## 93 geothermal 958.3333
## 94 geothermal 959.9167
## 95 geothermal 961.9167
## 96 geothermal 964.5833
## 97 geothermal 963.7500
## 98 geothermal 965.1667
## 99 geothermal 964.7500
## 100 geothermal 964.7500
## 101 geothermal 969.5000
## 102 geothermal 970.6667
## 103 geothermal 971.7500
## 104 geothermal 972.0833
## 105 geothermal 969.1667
## 106 geothermal 964.9167
## 107 geothermal 963.4167
## 108 geothermal 961.3333
## 109 geothermal 958.4167
## 110 geothermal 955.3333
## 111 geothermal 956.5000
## 112 geothermal 956.4167
## 113 geothermal 956.6667
## 114 geothermal 956.1667
## 115 geothermal 960.6667
## 116 geothermal 963.1667
## 117 geothermal 963.9167
## 118 geothermal 964.5000
## 119 geothermal 968.1667
## 120 geothermal 971.0000
## 121 geothermal 970.2500
## 122 geothermal 969.3333
## 123 geothermal 969.9167
## 124 geothermal 972.5000
## 125 geothermal 973.0000
## 126 geothermal 972.5000
## 127 geothermal 973.3333
## 128 geothermal 975.0833
## 129 geothermal 974.2500
## 130 geothermal 970.8333
## 131 geothermal 968.0833
## 132 geothermal 965.3333
## 133 geothermal 964.2500
## 134 geothermal 961.7500
## 135 geothermal 961.6667
## 136 geothermal 961.0833
## 137 geothermal 959.6667
## 138 geothermal 959.5000
## 139 geothermal 960.5000
## 140 geothermal 963.6667
## 141 geothermal 965.7500
## 142 geothermal 966.7500
## 143 geothermal 967.8333
## 144 geothermal 970.3333
## 145 geothermal 971.9167
## 146 geothermal 974.1667
## 147 geothermal 977.1667
## 148 geothermal 977.1667
## 149 geothermal 977.8333
## 150 geothermal 977.5833
## 151 geothermal 977.9167
## 152 geothermal 978.5833
## 153 geothermal 975.8333
## 154 geothermal 970.8333
## 155 geothermal 967.3333
## 156 geothermal 965.2500
## 157 geothermal 965.0833
## 158 geothermal 964.2500
## 159 geothermal 968.8333
## 160 geothermal 969.0000
## 161 geothermal 970.7500
## 162 geothermal 974.6667
## 163 geothermal 975.9167
## 164 geothermal 978.5000
## 165 geothermal 979.7500
## 166 geothermal 980.5833
## 167 geothermal 980.5833
## 168 geothermal 980.3333
%>%
long_merged_energy %>%
filter(source == "geothermal") %>%
select(-datetime) %>%
mutate(log_usage = log(usage))
## source usage log_usage
## 1 geothermal 967.2500 6.874457
## 2 geothermal 970.3333 6.877640
## 3 geothermal 973.1667 6.880555
## 4 geothermal 973.8333 6.881240
## 5 geothermal 974.6667 6.882096
## 6 geothermal 973.3333 6.880727
## 7 geothermal 972.3333 6.879699
## 8 geothermal 972.0833 6.879442
## 9 geothermal 968.5000 6.875748
## 10 geothermal 963.6667 6.870745
## 11 geothermal 959.0833 6.865978
## 12 geothermal 954.4167 6.861100
## 13 geothermal 944.5833 6.850744
## 14 geothermal 944.0833 6.850214
## 15 geothermal 943.9167 6.850038
## 16 geothermal 950.6667 6.857163
## 17 geothermal 955.9167 6.862671
## 18 geothermal 953.0833 6.859702
## 19 geothermal 950.9167 6.857426
## 20 geothermal 950.7500 6.857251
## 21 geothermal 951.0000 6.857514
## 22 geothermal 956.2500 6.863019
## 23 geothermal 959.8333 6.866760
## 24 geothermal 962.9167 6.869967
## 25 geothermal 966.0833 6.873250
## 26 geothermal 966.3333 6.873509
## 27 geothermal 967.0833 6.874285
## 28 geothermal 966.9167 6.874112
## 29 geothermal 967.4167 6.874629
## 30 geothermal 967.7500 6.874974
## 31 geothermal 966.1667 6.873336
## 32 geothermal 964.5000 6.871610
## 33 geothermal 960.0833 6.867020
## 34 geothermal 954.9167 6.861624
## 35 geothermal 953.8333 6.860489
## 36 geothermal 951.2500 6.857777
## 37 geothermal 948.6667 6.855057
## 38 geothermal 947.4167 6.853739
## 39 geothermal 951.6667 6.858215
## 40 geothermal 952.5833 6.859178
## 41 geothermal 954.0833 6.860751
## 42 geothermal 954.0000 6.860664
## 43 geothermal 956.7500 6.863542
## 44 geothermal 958.8333 6.865717
## 45 geothermal 962.6667 6.869707
## 46 geothermal 965.9167 6.873078
## 47 geothermal 969.0833 6.876351
## 48 geothermal 970.8333 6.878155
## 49 geothermal 970.6667 6.877983
## 50 geothermal 969.9167 6.877210
## 51 geothermal 970.9167 6.878241
## 52 geothermal 973.2500 6.880641
## 53 geothermal 975.4167 6.882865
## 54 geothermal 976.9167 6.884401
## 55 geothermal 976.0833 6.883548
## 56 geothermal 976.5833 6.884060
## 57 geothermal 973.2500 6.880641
## 58 geothermal 968.6667 6.875921
## 59 geothermal 964.6667 6.871783
## 60 geothermal 961.5000 6.868495
## 61 geothermal 960.0000 6.866933
## 62 geothermal 956.4167 6.863194
## 63 geothermal 955.1667 6.861886
## 64 geothermal 957.6667 6.864500
## 65 geothermal 957.5000 6.864326
## 66 geothermal 953.7500 6.860402
## 67 geothermal 951.6667 6.858215
## 68 geothermal 951.6667 6.858215
## 69 geothermal 954.0000 6.860664
## 70 geothermal 959.6667 6.866586
## 71 geothermal 965.9167 6.873078
## 72 geothermal 968.4167 6.875662
## 73 geothermal 969.1667 6.876437
## 74 geothermal 970.4167 6.877726
## 75 geothermal 972.5833 6.879956
## 76 geothermal 973.3333 6.880727
## 77 geothermal 974.2500 6.881668
## 78 geothermal 976.5000 6.883975
## 79 geothermal 976.5833 6.884060
## 80 geothermal 975.7500 6.883206
## 81 geothermal 971.9167 6.879270
## 82 geothermal 968.0833 6.875318
## 83 geothermal 965.5000 6.872646
## 84 geothermal 962.3333 6.869361
## 85 geothermal 958.0833 6.864935
## 86 geothermal 955.6667 6.862409
## 87 geothermal 952.4167 6.859003
## 88 geothermal 950.6667 6.857163
## 89 geothermal 953.0833 6.859702
## 90 geothermal 954.3333 6.861013
## 91 geothermal 957.5833 6.864413
## 92 geothermal 960.8333 6.867801
## 93 geothermal 958.3333 6.865196
## 94 geothermal 959.9167 6.866846
## 95 geothermal 961.9167 6.868928
## 96 geothermal 964.5833 6.871696
## 97 geothermal 963.7500 6.870832
## 98 geothermal 965.1667 6.872301
## 99 geothermal 964.7500 6.871869
## 100 geothermal 964.7500 6.871869
## 101 geothermal 969.5000 6.876780
## 102 geothermal 970.6667 6.877983
## 103 geothermal 971.7500 6.879099
## 104 geothermal 972.0833 6.879442
## 105 geothermal 969.1667 6.876437
## 106 geothermal 964.9167 6.872042
## 107 geothermal 963.4167 6.870486
## 108 geothermal 961.3333 6.868321
## 109 geothermal 958.4167 6.865283
## 110 geothermal 955.3333 6.862060
## 111 geothermal 956.5000 6.863281
## 112 geothermal 956.4167 6.863194
## 113 geothermal 956.6667 6.863455
## 114 geothermal 956.1667 6.862932
## 115 geothermal 960.6667 6.867627
## 116 geothermal 963.1667 6.870226
## 117 geothermal 963.9167 6.871005
## 118 geothermal 964.5000 6.871610
## 119 geothermal 968.1667 6.875404
## 120 geothermal 971.0000 6.878326
## 121 geothermal 970.2500 6.877554
## 122 geothermal 969.3333 6.876609
## 123 geothermal 969.9167 6.877210
## 124 geothermal 972.5000 6.879870
## 125 geothermal 973.0000 6.880384
## 126 geothermal 972.5000 6.879870
## 127 geothermal 973.3333 6.880727
## 128 geothermal 975.0833 6.882523
## 129 geothermal 974.2500 6.881668
## 130 geothermal 970.8333 6.878155
## 131 geothermal 968.0833 6.875318
## 132 geothermal 965.3333 6.872473
## 133 geothermal 964.2500 6.871351
## 134 geothermal 961.7500 6.868755
## 135 geothermal 961.6667 6.868668
## 136 geothermal 961.0833 6.868061
## 137 geothermal 959.6667 6.866586
## 138 geothermal 959.5000 6.866412
## 139 geothermal 960.5000 6.867454
## 140 geothermal 963.6667 6.870745
## 141 geothermal 965.7500 6.872905
## 142 geothermal 966.7500 6.873940
## 143 geothermal 967.8333 6.875060
## 144 geothermal 970.3333 6.877640
## 145 geothermal 971.9167 6.879270
## 146 geothermal 974.1667 6.881582
## 147 geothermal 977.1667 6.884657
## 148 geothermal 977.1667 6.884657
## 149 geothermal 977.8333 6.885339
## 150 geothermal 977.5833 6.885084
## 151 geothermal 977.9167 6.885424
## 152 geothermal 978.5833 6.886106
## 153 geothermal 975.8333 6.883292
## 154 geothermal 970.8333 6.878155
## 155 geothermal 967.3333 6.874543
## 156 geothermal 965.2500 6.872387
## 157 geothermal 965.0833 6.872214
## 158 geothermal 964.2500 6.871351
## 159 geothermal 968.8333 6.876093
## 160 geothermal 969.0000 6.876265
## 161 geothermal 970.7500 6.878069
## 162 geothermal 974.6667 6.882096
## 163 geothermal 975.9167 6.883377
## 164 geothermal 978.5000 6.886021
## 165 geothermal 979.7500 6.887297
## 166 geothermal 980.5833 6.888148
## 167 geothermal 980.5833 6.888148
## 168 geothermal 980.3333 6.887893
%>%
long_merged_energy %>%
filter(source == "geothermal") %>%
select(-datetime) %>%
mutate(log_usage = log(usage)) %>%
summarize(mean_log_usage = mean(log_usage, na.rm = T))
## mean_log_usage
## 1 6.872053
%>%
.
merged_energy %>%
select(-datetime) %>%
mutate(total_usage = rowSums(., na.rm = T)) %>%
summarize(total_usage = sum(total_usage, na.rm = T))
## total_usage
## 1 5223392
%>%
.
merged_energy %>%
select(-datetime) %>%
mutate(total_usage = rowSums(merged_energy, na.rm = T)) %>%
summarize(total_usage = sum(total_usage, na.rm = T))
## Error in rowSums(merged_energy, na.rm = T): 'x' must be numeric
merged_energy
do the following with pipes:
total_hydro
that is the sum of the retained hydro variablestotal_hydro
merged_energy %>%
select(contains("hydro")) %>%
mutate(total_hydro = rowSums(., na.rm = T)) %>%
summarize(mean_hydro = mean(total_hydro, na.rm = T))
## mean_hydro
## 1 3387.702
group_by
and summarize
group_by
is a powerful function that allows us to perform operations by groups of observations!
## # A tibble: 12 x 2
## source sum_usage
## <fct> <dbl>
## 1 biogas 39427.
## 2 biomass 62890.
## 3 coal 1528.
## 4 geothermal 162114.
## 5 large_hydro 498668.
## 6 natural_gas 1786555.
## 7 nuclear 378618.
## 8 other 0
## 9 small_hydro 70466.
## 10 solar 660315.
## 11 wind 440125.
## 12 imports 1122687.
group_by
versus for
loopsfor
loop we wrote yesterday to find the mean life expectancy by year?dplyr
group_by
versus for
loopsgapminder <- read.csv(here::here("data/gapminder5.csv"))
gapminder %>%
group_by(year) %>%
summarize(mean_le = mean(lifeExp, na.rm = T),
sd_lf = sd(lifeExp, na.rm = T))
## # A tibble: 12 x 3
## year mean_le sd_lf
## <int> <dbl> <dbl>
## 1 1952 49.1 12.2
## 2 1957 51.5 12.2
## 3 1962 53.6 12.1
## 4 1967 55.7 11.7
## 5 1972 57.6 11.4
## 6 1977 59.6 11.2
## 7 1982 61.5 10.8
## 8 1987 63.2 10.6
## 9 1992 64.2 11.2
## 10 1997 65.0 11.6
## 11 2002 65.7 12.3
## 12 2007 67.0 12.1
dplyr
to find the mean usage for small hydro, large hydro, biogas, and biomassmerged_energy %>%
select(datetime, contains("hydro"), contains("bio")) %>%
melt(id.vars = "datetime",
variable.name = "source",
value.name = "usage") %>%
group_by(source) %>%
summarize(mean_usage = mean(usage, na.rm = T))
## # A tibble: 4 x 2
## source mean_usage
## <fct> <dbl>
## 1 large_hydro 2968.
## 2 small_hydro 419.
## 3 biogas 235.
## 4 biomass 374.
dplyr
with *_join
functionsdplyr
’s philosophy is to have more functions with fewer options
left_join
- keeps all observations in the first dataframe
merge(all.x = T)
right_join
- keeps all observations in the second dataframe
merge(all.y = T)
full_join
- keeps all observations in both dataframes
merge(all = T)
inner_join
- keeps only those observations that are matched in both datasets
merge(all = F)
merge
and inner_join
## [1] 168 13
## [1] 168 13