Tidy data with layers in column

Example Data

costs <- data.frame(
  Season=c("Spring","Summer","Autumn","Winter"),
  Rent = c(1000,1000, 1050, 1100),
  Lodger_1 = rep("Smith",4),
  Lodger_2 = rep("Taylor",4),
  Lodger_3 = c("Mark","Mark","Twain","Finn"),
  Lodger_4 = c("Twain","Twain","Mark","Sawyer"),
  Heating_1 = c(60, 10, 90, 150),
  Heating_2 = c(50, 0, 70, 120),
  Heating_3 = c(50, 0, 75, 120),
  Heating_4 = c(55, 0, 70, 160),
  Elevator_2 = c(20, 20, 20, 22),
  Elevator_3 = c(25, 25, 25, 28),
  Elevator_4 = c(30, 30, 30, 33),
  
  stringsAsFactors = FALSE
)

knitr::kable(costs)
Season Rent Lodger_1 Lodger_2 Lodger_3 Lodger_4 Heating_1 Heating_2 Heating_3 Heating_4 Elevator_2 Elevator_3 Elevator_4
Spring 1000 Smith Taylor Mark Twain 60 50 50 55 20 25 30
Summer 1000 Smith Taylor Mark Twain 10 0 0 0 20 25 30
Autumn 1050 Smith Taylor Twain Mark 90 70 75 70 20 25 30
Winter 1100 Smith Taylor Finn Sawyer 150 120 120 160 22 28 33

Tidying the data step by step

library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0       v purrr   0.3.0  
## v tibble  2.0.1       v dplyr   0.8.0.1
## v tidyr   0.8.2       v stringr 1.4.0  
## v readr   1.3.1       v forcats 0.4.0
## -- Conflicts -------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
gathered <- costs %>% 
    gather(key="type_floor",value="val", matches(paste0("_[0-9]+")))

knitr::kable(gathered)
Season Rent type_floor val
Spring 1000 Lodger_1 Smith
Summer 1000 Lodger_1 Smith
Autumn 1050 Lodger_1 Smith
Winter 1100 Lodger_1 Smith
Spring 1000 Lodger_2 Taylor
Summer 1000 Lodger_2 Taylor
Autumn 1050 Lodger_2 Taylor
Winter 1100 Lodger_2 Taylor
Spring 1000 Lodger_3 Mark
Summer 1000 Lodger_3 Mark
Autumn 1050 Lodger_3 Twain
Winter 1100 Lodger_3 Finn
Spring 1000 Lodger_4 Twain
Summer 1000 Lodger_4 Twain
Autumn 1050 Lodger_4 Mark
Winter 1100 Lodger_4 Sawyer
Spring 1000 Heating_1 60
Summer 1000 Heating_1 10
Autumn 1050 Heating_1 90
Winter 1100 Heating_1 150
Spring 1000 Heating_2 50
Summer 1000 Heating_2 0
Autumn 1050 Heating_2 70
Winter 1100 Heating_2 120
Spring 1000 Heating_3 50
Summer 1000 Heating_3 0
Autumn 1050 Heating_3 75
Winter 1100 Heating_3 120
Spring 1000 Heating_4 55
Summer 1000 Heating_4 0
Autumn 1050 Heating_4 70
Winter 1100 Heating_4 160
Spring 1000 Elevator_2 20
Summer 1000 Elevator_2 20
Autumn 1050 Elevator_2 20
Winter 1100 Elevator_2 22
Spring 1000 Elevator_3 25
Summer 1000 Elevator_3 25
Autumn 1050 Elevator_3 25
Winter 1100 Elevator_3 28
Spring 1000 Elevator_4 30
Summer 1000 Elevator_4 30
Autumn 1050 Elevator_4 30
Winter 1100 Elevator_4 33
separated <- gathered %>% 
    separate("type_floor", c("type","Floor"),sep="_", convert=TRUE)

knitr::kable(separated)
Season Rent type Floor val
Spring 1000 Lodger 1 Smith
Summer 1000 Lodger 1 Smith
Autumn 1050 Lodger 1 Smith
Winter 1100 Lodger 1 Smith
Spring 1000 Lodger 2 Taylor
Summer 1000 Lodger 2 Taylor
Autumn 1050 Lodger 2 Taylor
Winter 1100 Lodger 2 Taylor
Spring 1000 Lodger 3 Mark
Summer 1000 Lodger 3 Mark
Autumn 1050 Lodger 3 Twain
Winter 1100 Lodger 3 Finn
Spring 1000 Lodger 4 Twain
Summer 1000 Lodger 4 Twain
Autumn 1050 Lodger 4 Mark
Winter 1100 Lodger 4 Sawyer
Spring 1000 Heating 1 60
Summer 1000 Heating 1 10
Autumn 1050 Heating 1 90
Winter 1100 Heating 1 150
Spring 1000 Heating 2 50
Summer 1000 Heating 2 0
Autumn 1050 Heating 2 70
Winter 1100 Heating 2 120
Spring 1000 Heating 3 50
Summer 1000 Heating 3 0
Autumn 1050 Heating 3 75
Winter 1100 Heating 3 120
Spring 1000 Heating 4 55
Summer 1000 Heating 4 0
Autumn 1050 Heating 4 70
Winter 1100 Heating 4 160
Spring 1000 Elevator 2 20
Summer 1000 Elevator 2 20
Autumn 1050 Elevator 2 20
Winter 1100 Elevator 2 22
Spring 1000 Elevator 3 25
Summer 1000 Elevator 3 25
Autumn 1050 Elevator 3 25
Winter 1100 Elevator 3 28
Spring 1000 Elevator 4 30
Summer 1000 Elevator 4 30
Autumn 1050 Elevator 4 30
Winter 1100 Elevator 4 33
result <- separated %>%
    spread(key="type",val="val", convert=TRUE)
knitr::kable(result)
Season Rent Floor Elevator Heating Lodger
Autumn 1050 1 NA 90 Smith
Autumn 1050 2 20 70 Taylor
Autumn 1050 3 25 75 Twain
Autumn 1050 4 30 70 Mark
Spring 1000 1 NA 60 Smith
Spring 1000 2 20 50 Taylor
Spring 1000 3 25 50 Mark
Spring 1000 4 30 55 Twain
Summer 1000 1 NA 10 Smith
Summer 1000 2 20 0 Taylor
Summer 1000 3 25 0 Mark
Summer 1000 4 30 0 Twain
Winter 1100 1 NA 150 Smith
Winter 1100 2 22 120 Taylor
Winter 1100 3 28 120 Finn
Winter 1100 4 33 160 Sawyer

Making a function

`%>%` <- magrittr::`%>%`
gather_layered <- function(data, sep="_", layername = "layernr")
{
  data %>% 
    tidyr::gather(key="type_nr",value="val", tidyselect::matches(paste0(sep,"[0-9]+"))) %>%
    tidyr::separate("type_nr", c("type",layername),sep=sep, convert=TRUE) %>%
    tidyr::spread(key="type",val="val", convert=TRUE)
}
knitr::kable(gather_layered(costs, sep="_", layername = "Layer"))
Season Rent Layer Elevator Heating Lodger
Autumn 1050 1 NA 90 Smith
Autumn 1050 2 20 70 Taylor
Autumn 1050 3 25 75 Twain
Autumn 1050 4 30 70 Mark
Spring 1000 1 NA 60 Smith
Spring 1000 2 20 50 Taylor
Spring 1000 3 25 50 Mark
Spring 1000 4 30 55 Twain
Summer 1000 1 NA 10 Smith
Summer 1000 2 20 0 Taylor
Summer 1000 3 25 0 Mark
Summer 1000 4 30 0 Twain
Winter 1100 1 NA 150 Smith
Winter 1100 2 22 120 Taylor
Winter 1100 3 28 120 Finn
Winter 1100 4 33 160 Sawyer

Related