library(smokingPrevalence)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(withr)
library(utils)
library(readxl)

ONS public smoking prevalence data

Original wide form

The data is released in wide form and the following imports this data as it appears in the spreadsheet form:

get_smoking_wide(clean = FALSE)
#> New names:
#> * `` -> ...2
#> * `` -> ...3
#> * `` -> ...4
#> * `` -> ...5
#> * `` -> ...6
#> * ...
#> # A tibble: 52 × 21
#>    `All persons age… ...2  ...3  ...4  ...5  ...6  ...7  ...8  ...9  ...10 ...11
#>    <chr>             <chr> <chr> <chr> <chr> <chr> <chr> <lgl> <chr> <chr> <chr>
#>  1 <NA>              Men   <NA>  <NA>  <NA>  <NA>  <NA>  NA    Women <NA>  <NA> 
#>  2 <NA>              16-24 25-34 35-49 50-59 60 a… All … NA    16-24 25-34 35-49
#>  3 <NA>              <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  NA    <NA>  <NA>  <NA> 
#>  4 Unweighted 1      <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  NA    <NA>  <NA>  <NA> 
#>  5 1974 2 3          47.3… 55.1… 55.2… 52.7… 44.5  51.3… NA    41.2… 46.8… 49   
#>  6 1976 2 3          42.7… 48.2… 50.5  49.8… 39.7… 46.2… NA    40.2… 42.8… 45.2…
#>  7 1978 2 3          40.1… 48.3… 48    47.7… 38.2… 44.5  NA    38.2… 42.3… 42.7…
#>  8 1980 2 3          38.2… 47.2… 45.3… 46.7… 35.6… 42.3… NA    36.5  44    42.6…
#>  9 1982 2 3          35.7… 40.2… 40.2… 41.6… 32.7… 37.7… NA    35    36.7… 37.7…
#> 10 1984 2 3          35    40.2… 38.6… 39.3… 30.3… 36.2… NA    34.5  35.7… 35.7…
#> # … with 42 more rows, and 10 more variables: ...12 <chr>, ...13 <chr>,
#> #   ...14 <chr>, ...15 <lgl>, ...16 <chr>, ...17 <chr>, ...18 <chr>,
#> #   ...19 <chr>, ...20 <chr>, Percentages <chr>

To tidy the wide data which combines the 2 row headers to 1 and removes blank rows and columns:

get_smoking_wide() 
#> New names:
#> * `` -> ...2
#> * `` -> ...3
#> * `` -> ...4
#> * `` -> ...5
#> * `` -> ...6
#> * ...
#> New names:
#> * `` -> ...2
#> * `` -> ...3
#> * `` -> ...4
#> * `` -> ...5
#> * `` -> ...6
#> * ...
#> # A tibble: 33 × 19
#>    header `Men 16-24` `Men 25-34` `Men 35-49` `Men 50-59` `Men 60 and over`
#>     <dbl>       <dbl>       <dbl>       <dbl>       <dbl>             <dbl>
#>  1   1974        47.4        55.1        55.3        52.8              44.5
#>  2   1976        42.8        48.2        50.5        49.9              39.8
#>  3   1978        40.1        48.4        48          47.7              38.3
#>  4   1980        38.3        47.3        45.4        46.7              35.6
#>  5   1982        35.8        40.2        40.2        41.6              32.8
#>  6   1984        35          40.3        38.6        39.4              30.3
#>  7   1986        36.1        36.9        37.4        35.2              28.5
#>  8   1988        33.2        36.9        37          32.5              25.8
#>  9   1990        33.2        36.3        34.3        27.8              24.1
#> 10   1992        34.8        34.3        31.9        28.2              21  
#> # … with 23 more rows, and 13 more variables: Men All aged 16 and over <dbl>,
#> #   Women 16-24 <dbl>, Women 25-34 <dbl>, Women 35-49 <dbl>, Women 50-59 <dbl>,
#> #   Women 60 and over <dbl>, Women All aged 16 and over <dbl>,
#> #   Allpersons 16-24 <dbl>, Allpersons 25-34 <dbl>, Allpersons 35-49 <dbl>,
#> #   Allpersons 50-59 <dbl>, Allpersons 60 and over <dbl>,
#> #   Allpersons All aged 16 and over <dbl>

#or explicitly

get_smoking_wide(clean = TRUE) 
#> New names:
#> * `` -> ...2
#> * `` -> ...3
#> * `` -> ...4
#> * `` -> ...5
#> * `` -> ...6
#> * ...
#> New names:
#> * `` -> ...2
#> * `` -> ...3
#> * `` -> ...4
#> * `` -> ...5
#> * `` -> ...6
#> * ...
#> # A tibble: 33 × 19
#>    header `Men 16-24` `Men 25-34` `Men 35-49` `Men 50-59` `Men 60 and over`
#>     <dbl>       <dbl>       <dbl>       <dbl>       <dbl>             <dbl>
#>  1   1974        47.4        55.1        55.3        52.8              44.5
#>  2   1976        42.8        48.2        50.5        49.9              39.8
#>  3   1978        40.1        48.4        48          47.7              38.3
#>  4   1980        38.3        47.3        45.4        46.7              35.6
#>  5   1982        35.8        40.2        40.2        41.6              32.8
#>  6   1984        35          40.3        38.6        39.4              30.3
#>  7   1986        36.1        36.9        37.4        35.2              28.5
#>  8   1988        33.2        36.9        37          32.5              25.8
#>  9   1990        33.2        36.3        34.3        27.8              24.1
#> 10   1992        34.8        34.3        31.9        28.2              21  
#> # … with 23 more rows, and 13 more variables: Men All aged 16 and over <dbl>,
#> #   Women 16-24 <dbl>, Women 25-34 <dbl>, Women 35-49 <dbl>, Women 50-59 <dbl>,
#> #   Women 60 and over <dbl>, Women All aged 16 and over <dbl>,
#> #   Allpersons 16-24 <dbl>, Allpersons 25-34 <dbl>, Allpersons 35-49 <dbl>,
#> #   Allpersons 50-59 <dbl>, Allpersons 60 and over <dbl>,
#> #   Allpersons All aged 16 and over <dbl>