Specifying multiple column names inside mutate

Specifying multiple column names inside mutate

Problem Description:

How do I specify column names inside mutate when multiple columns are generated?

In this example:

set.seed(5)
data.frame(x2 = sample(1:10, 10),
           x3 = sample(1:10, 10),
           x1 = sample(1:10, 10),
           y3 = sample(1:10, 10),
           y2 = sample(1:10, 10),
           y1 = sample(1:10, 10)) |>
 mutate(z1 = x1 - y1,
        z2 = x2 - y2,
        z3 = x3 - y3) |>
 mutate(zz = across(num_range(prefix = 'x',
                               range = 1:3)) - across(num_range(prefix = 'y',
                                                                range = 1:3)))

Resulting in:

   x2 x3 x1 y3 y2 y1 z1 z2 z3 zz.x1 zz.x2 zz.x3
1   2  3  9 10  9  6  3 -7 -7     3    -7    -7
2   9 10  6  6  4  5  1  5  4     1     5     4
3   7  6  4  8  8  3  1 -1 -2     1    -1    -2
4   3  2  3  4 10  8 -5 -7 -2    -5    -7    -2
5   1  5  2  5  7  7 -5 -6  0    -5    -6     0
6   6  4  5  3  6  2  3  0  1     3     0     1
7   5  8 10  2  1  4  6  4  6     6     4     6
8  10  7  8  7  3  1  7  7  0     7     7     0
9   4  1  1  9  2  9 -8  2 -8    -8     2    -8
10  8  9  7  1  5 10 -3  3  8    -3     3     8

I want zz.x1 be named zz1, …

Solution – 1

I don’t know how to do this with dplyr but in base R it is pretty straightforward. This might partly answer also your previous question.

# hard-coded variable suffixes
suff <- 1:3
# OR suffixes extracted from data
suff <- sort(unique(sub('[a-z]*', '', names(df))))

for (i in suff) {
  df[[paste0('zz', i)]] <- df[[paste0('x', i)]] - df[[paste0('y', i)]]
}

df
#    x2 x3 x1 y3 y2 y1 zz1 zz2 zz3
# 1   2  3  9 10  9  6   3  -7  -7
# 2   9 10  6  6  4  5   1   5   4
# 3   7  6  4  8  8  3   1  -1  -2
# 4   3  2  3  4 10  8  -5  -7  -2
# 5   1  5  2  5  7  7  -5  -6   0
# 6   6  4  5  3  6  2   3   0   1
# 7   5  8 10  2  1  4   6   4   6
# 8  10  7  8  7  3  1   7   7   0
# 9   4  1  1  9  2  9  -8   2  -8
# 10  8  9  7  1  5 10  -3   3   8

A more efficient way which avoids the loop over suffixes would be like this:

zz <- df[paste0('x', suff)] - df[paste0('y', suff)]
names(zz) <- paste0('zz', suff)
df <- cbind(df, zz)

Data:

set.seed(5)
df <- data.frame(x2 = sample(1:10, 10),
                 x3 = sample(1:10, 10),
                 x1 = sample(1:10, 10),
                 y3 = sample(1:10, 10),
                 y2 = sample(1:10, 10),
                 y1 = sample(1:10, 10))

Solution – 2

Here’s a dplyr-way:

mutate takes its name(s) from the first across and we can change this using the .names-argument. It accepts glue-style input that we can adapt to your needs using str_replace().

library(dplyr)
library(stringr)

df |>
  mutate(across(num_range(prefix = 'x', range = 1:3),
                .names = "{str_replace(col, 'x', 'z')}")
         - across(num_range(prefix = 'y',range = 1:3)))

Output:

   x2 x3 x1 y3 y2 y1 z1 z2 z3
1   2  3  9 10  9  6  3 -7 -7
2   9 10  6  6  4  5  1  5  4
3   7  6  4  8  8  3  1 -1 -2
4   3  2  3  4 10  8 -5 -7 -2
5   1  5  2  5  7  7 -5 -6  0
6   6  4  5  3  6  2  3  0  1
7   5  8 10  2  1  4  6  4  6
8  10  7  8  7  3  1  7  7  0
9   4  1  1  9  2  9 -8  2 -8
10  8  9  7  1  5 10 -3  3  8

Data:

set.seed(5)
df <- data.frame(x2 = sample(1:10, 10),
           x3 = sample(1:10, 10),
           x1 = sample(1:10, 10),
           y3 = sample(1:10, 10),
           y2 = sample(1:10, 10),
           y1 = sample(1:10, 10)) 

Update: Or similar to OP’s desired output

df2 |>
  mutate(across(num_range(prefix = 'x', range = 1:3),
                .names = "{str_replace(col, 'x', 'zz')}")
         - across(num_range(prefix = 'y',range = 1:3))) 

Output:

   x2 x3 x1 y3 y2 y1 z1 z2 z3 zz1 zz2 zz3
1   2  3  9 10  9  6  3 -7 -7   3  -7  -7
2   9 10  6  6  4  5  1  5  4   1   5   4
3   7  6  4  8  8  3  1 -1 -2   1  -1  -2
4   3  2  3  4 10  8 -5 -7 -2  -5  -7  -2
5   1  5  2  5  7  7 -5 -6  0  -5  -6   0
6   6  4  5  3  6  2  3  0  1   3   0   1
7   5  8 10  2  1  4  6  4  6   6   4   6
8  10  7  8  7  3  1  7  7  0   7   7   0
9   4  1  1  9  2  9 -8  2 -8  -8   2  -8
10  8  9  7  1  5 10 -3  3  8  -3   3   8

Data

set.seed(5)
df2 <- data.frame(x2 = sample(1:10, 10),
           x3 = sample(1:10, 10),
           x1 = sample(1:10, 10),
           y3 = sample(1:10, 10),
           y2 = sample(1:10, 10),
           y1 = sample(1:10, 10)) |>
  mutate(z1 = x1 - y1,
         z2 = x2 - y2,
         z3 = x3 - y3)
Rate this post
We use cookies in order to give you the best possible experience on our website. By continuing to use this site, you agree to our use of cookies.
Accept
Reject