mapeando um cálculo simples sobre linhas e listas usando dplyr


8

Eu tenho alguns dados que se parecem com o seguinte:

$`2013 Jul`
# A tibble: 3 x 12
      AAPL     AMD      ADI    ABBV        A      APD       AA       CF     NVDA      HOG      WMT     AMZN
     <dbl>   <dbl>    <dbl>   <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
1 -0.00252 0.00385 0.000314 0.00148 0.000231 0.000655 -0.00107 -0.00137 0.000886 0.000806 0.000689 0.000615
2  1       5       2        5       2        3         1        1       4        4        3        3       
3  0.2     0.2     0        0.2     0        0         0.2      0.2     0        0        0        0 

O que estou tentando fazer é quando a linha 2 == 1, multiplica a linha 3 por -1. De tal forma que o resultado esperado seria:

$`2013 Jul`
# A tibble: 3 x 12
      AAPL     AMD      ADI    ABBV        A      APD       AA       CF     NVDA      HOG      WMT     AMZN
     <dbl>   <dbl>    <dbl>   <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
1 -0.00252 0.00385 0.000314 0.00148 0.000231 0.000655 -0.00107 -0.00137 0.000886 0.000806 0.000689 0.000615
2  1       5       2        5       2        3         1        1       4        4        3        3       
3 -0.2     0.2     0        0.2     0        0        -0.2     -0.2     0        0        0        0 

Eu estou tentando fazer isso usando as mapfunções e tidyr.

Dados:

lst1 <- list(`2013 Jul` = structure(list(AAPL = c(-0.00252413896048252, 
1, 0.2), AMD = c(0.00385385230384388, 5, 0.2), ADI = c(0.000313658814841043, 
2, 0), ABBV = c(0.00148473194650269, 5, 0.2), A = c(0.000231372267065186, 
2, 0), APD = c(0.000654593370621786, 3, 0), AA = c(-0.00106999405402468, 
1, 0.2), CF = c(-0.00136811540143579, 1, 0.2), NVDA = c(0.000886435916090005, 
4, 0), HOG = c(0.000806051331850114, 4, 0), WMT = c(0.000689490484865284, 
3, 0), AMZN = c(0.000614708184565435, 3, 0)), row.names = c(NA, 
-3L), class = c("tbl_df", "tbl", "data.frame")), `2013 Aug` = structure(list(
    AAPL = c(0.0000471064768722691, 1, 0.2), AMD = c(0.00297250845145986, 
    5, 0.2), ADI = c(0.00110927645875706, 3, 0), ABBV = c(0.00186505842086247, 
    4, 0), A = c(0.0000542259939665846, 2, 0), APD = c(0.00187188084293685, 
    5, 0.2), AA = c(-0.000794925865044543, 1, 0.2), CF = c(-0.00109320436559941, 
    1, 0.2), NVDA = c(0.00139874295083158, 4, 0), HOG = c(0.000699507074667968, 
    2, 0), WMT = c(0.000964557826996342, 3, 0), AMZN = c(0.00100980845937234, 
    3, 0)), row.names = c(NA, -3L), class = c("tbl_df", "tbl", 
"data.frame")), `2013 Sep` = structure(list(AAPL = c(0.000874550640770086, 
3, 0), AMD = c(0.00212896308150426, 5, 0.2), ADI = c(0.000297401899798995, 
1, 0.2), ABBV = c(0.00126327568847214, 4, 0), A = c(0.00097767693668047, 
3, 0), APD = c(0.00144402630305102, 5, 0.2), AA = c(-0.000734440361937234, 
1, 0.2), CF = c(-0.000254998800234454, 1, 0.2), NVDA = c(0.00127259056829905, 
4, 0), HOG = c(0.00105093597431519, 3, 0), WMT = c(0.00038339075327491, 
2, 0), AMZN = c(0.000479002073488143, 2, 0)), row.names = c(NA, 
-3L), class = c("tbl_df", "tbl", "data.frame")), `2013 Oct` = structure(list(
    AAPL = c(0.000682565466572836, 2, 0), AMD = c(0.00313699867162714, 
    5, 0.2), ADI = c(0.000209923665516306, 1, 0.2), ABBV = c(0.000865756791407934, 
    2, 0), A = c(0.00161631482825611, 4, 0), APD = c(0.00169177940768777, 
    5, 0.2), AA = c(-0.000319519044240903, 1, 0.2), CF = c(0.00096163857613333, 
    3, 0), NVDA = c(0.00158604241362254, 4, 0), HOG = c(0.00151424115101764, 
    3, 0), WMT = c(0.00000265229900199134, 1, 0.2), AMZN = c(0.00124777917896926, 
    3, 0)), row.names = c(NA, -3L), class = c("tbl_df", "tbl", 
"data.frame")), `2013 Nov` = structure(list(AAPL = c(0.00138847413611967, 
4, 0), AMD = c(0.00131189086851618, 3, 0), ADI = c(0.000998905149605624, 
2, 0), ABBV = c(0.00053428429850944, 1, 0.2), A = c(0.0016278252466143, 
4, 0), APD = c(0.00186840190432607, 5, 0.2), AA = c(0.000727945791304539, 
1, 0.2), CF = c(0.00128641077503917, 3, 0), NVDA = c(0.000839077672381489, 
2, 0), HOG = c(0.00128443125529569, 3, 0), WMT = c(-0.00000406995915300601, 
1, 0.2), AMZN = c(0.00279573900270221, 5, 0.2)), row.names = c(NA, 
-3L), class = c("tbl_df", "tbl", "data.frame")), `2013 Dec` = structure(list(
    AAPL = c(0.00176889092052374, 5, 0.2), AMD = c(-0.000742603775364103, 
    1, 0.2), ADI = c(0.00044132637464973, 1, 0.2), ABBV = c(0.00113925715965696, 
    3, 0), A = c(0.00135042334177499, 4, 0), APD = c(0.0012375761024876, 
    3, 0), AA = c(0.00102055404174894, 2, 0), CF = c(0.00128611035428346, 
    3, 0), NVDA = c(0.000674203833347995, 2, 0), HOG = c(0.00164877495332821, 
    4, 0), WMT = c(0.000671450466059644, 1, 0.2), AMZN = c(0.00299158521138261, 
    5, 0.2)), row.names = c(NA, -3L), class = c("tbl_df", "tbl", 
"data.frame")))

1
Desculpas. Os valores na linha 2 podem demorar, 1, 2, 3, 4, 5então quando o valor nessa linha for 1, desejo modificar a linha 3 multiplicando-a por -1.
precisa saber é o seguinte

Respostas:


4

Você pode fazer:

map(lst1, ~ .x %>%
     mutate_all(~ if_else(row_number() == 3 & lag(.) == 1, . * -1, .)))

$`2013 Jul`
# A tibble: 3 x 12
      AAPL     AMD     ADI    ABBV       A     APD       AA       CF    NVDA     HOG
     <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>    <dbl>    <dbl>   <dbl>   <dbl>
1 -0.00252 0.00385 3.14e-4 0.00148 2.31e-4 6.55e-4 -0.00107 -0.00137 8.86e-4 8.06e-4
2  1       5       2.00e+0 5       2.00e+0 3.00e+0  1        1       4.00e+0 4.00e+0
3 -0.2     0.2     0.      0.2     0.      0.      -0.2     -0.2     0.      0.     
# … with 2 more variables: WMT <dbl>, AMZN <dbl>

6

Se você não está restrito a usar esses pacotes específicos, uma solução R básica pode ser usar

> lapply(lst1, function(df) do.call(cbind, lapply(df, function(col) {
    col[3] <- ifelse(col[2]==1, -col[3], col[3])
    return (col)
})))
# 
# $`2013 Jul`
#              AAPL         AMD           ADI         ABBV           A
# [1,] 0.0006825655 0.003136999  0.0002099237 0.0008657568 0.001616315
# [2,] 2.0000000000 5.000000000  1.0000000000 2.0000000000 4.000000000
# [3,] 0.0000000000 0.200000000 -0.2000000000 0.0000000000 0.000000000
#              APD           AA           CF        NVDA         HOG
# [1,] 0.001691779 -0.000319519 0.0009616386 0.001586042 0.001514241
# [2,] 5.000000000  1.000000000 3.0000000000 4.000000000 3.000000000
# [3,] 0.200000000 -0.200000000 0.0000000000 0.000000000 0.000000000
#                WMT        AMZN
# [1,]  2.652299e-06 0.001247779
# [2,]  1.000000e+00 3.000000000
# [3,] -2.000000e-01 0.000000000
# ...

que faz um loop em todos os quadros de dados de em lst1, subsequentemente em um loop em todas as colunas em cada quadro de dados e altera a terceira linha de acordo com o valor da segunda linha.


3

Uma opção com case_whenemap

library(purrr)
library(dplyr)
map(lst1, ~ .x %>%  
           mutate_all(~
         case_when(row_number() == 3 & lag(.)== 1 ~ -1 * ., TRUE ~ .)))

2

Você pode usar [<-.data.framedentro mappara substituir os elementos na linha 3 em que a linha 2 é 1 pelo negativo.

lst1 %>% 
  map(~ {i <- c(.x[2,] == 1); .x[3, i] <- -.x[3, i]; .x})
Ao utilizar nosso site, você reconhece que leu e compreendeu nossa Política de Cookies e nossa Política de Privacidade.
Licensed under cc by-sa 3.0 with attribution required.