x_dt <- data.table::as.data.table(x)
y_dt <- data.table::as.data.table(y)data.table
library(data.table)
dt <- mtcars
data.table::setDT(dt)dplyr
library(dplyr)
library(tidyr)
library(magrittr)
df <- mtcarsbaseR <- aggregate(. ~ cyl, data = df, FUN = length)
baseR <- baseR[c("cyl", "mpg")]  # Adjust column names if needed
print(baseR)  cyl mpg
1   4  11
2   6   7
3   8  14
group by
dt[, .(n=.N), by= .(cyl)]     cyl     n
   <num> <int>
1:     6     7
2:     4    11
3:     8    14
df %>% group_by(cyl) %>% 
  summarize(n=n())# A tibble: 3 × 2
    cyl     n
  <dbl> <int>
1     4    11
2     6     7
3     8    14
dt[, .(avg= mean(mpg)), by= .(cyl)]     cyl      avg
   <num>    <num>
1:     6 19.74286
2:     4 26.66364
3:     8 15.10000
df %>% group_by(cyl) %>% 
  summarize(avg=mean(mpg))# A tibble: 3 × 2
    cyl   avg
  <dbl> <dbl>
1     4  26.7
2     6  19.7
3     8  15.1
dt[, .(avg= mean(mpg)), by= .(cyl,gear)]     cyl  gear    avg
   <num> <num>  <num>
1:     6     4 19.750
2:     4     4 26.925
3:     6     3 19.750
4:     8     3 15.050
5:     4     3 21.500
6:     4     5 28.200
7:     8     5 15.400
8:     6     5 19.700
df %>% group_by(cyl,gear) %>% 
  summarize(avg=mean(mpg))# A tibble: 8 × 3
# Groups:   cyl [3]
    cyl  gear   avg
  <dbl> <dbl> <dbl>
1     4     3  21.5
2     4     4  26.9
3     4     5  28.2
4     6     3  19.8
5     6     4  19.8
6     6     5  19.7
7     8     3  15.0
8     8     5  15.4
pivot
# wide dataframe
wide_dt <- data.frame(player=c('A', 'B', 'C', 'D'),
                 year1=c(12, 15, 19, 19),
                 year2=c(22, 29, 18, 12))
data.table::setDT(wide_dt)
head(wide_dt)   player year1 year2
   <char> <num> <num>
1:      A    12    22
2:      B    15    29
3:      C    19    18
4:      D    19    12
wide_df <- data.frame(player=c('A', 'B', 'C', 'D'),
                 year1=c(12, 15, 19, 19),
                 year2=c(22, 29, 18, 12))
wide_df  player year1 year2
1      A    12    22
2      B    15    29
3      C    19    18
4      D    19    12
# wide dataframe to long dataframe
wide_to_long <- data.table::melt(wide_dt,
                                 measure.vars=c('year1','year2'),
                                 variable.name= 'year',
                                 value.name = 'points')
wide_to_long   player   year points
   <char> <fctr>  <num>
1:      A  year1     12
2:      B  year1     15
3:      C  year1     19
4:      D  year1     19
5:      A  year2     22
6:      B  year2     29
7:      C  year2     18
8:      D  year2     12
long_df <- tidyr::pivot_longer(wide_df,
                    cols=c('year1', 'year2'),
                    names_to='year',
                    values_to='points')
long_df# A tibble: 8 × 3
  player year  points
  <chr>  <chr>  <dbl>
1 A      year1     12
2 A      year2     22
3 B      year1     15
4 B      year2     29
5 C      year1     19
6 C      year2     18
7 D      year1     19
8 D      year2     12
# long dataframe to wide dataframe
long_to_wide <- data.table::dcast(wide_to_long, 
                                  player ~ year,
                                  value.var = 'points')
long_to_wideKey: <player>
   player year1 year2
   <char> <num> <num>
1:      A    12    22
2:      B    15    29
3:      C    19    18
4:      D    19    12
long_to_wide_df <- tidyr::pivot_wider(long_df,
                              names_from = 'year',
                              values_from = 'points')
long_to_wide_df# A tibble: 4 × 3
  player year1 year2
  <chr>  <dbl> <dbl>
1 A         12    22
2 B         15    29
3 C         19    18
4 D         19    12
pivot in base R with reshape()
long to wide
# base R, reshape function
base_wide <- data.frame(player=c('A', 'B', 'C', 'D'),
                 year1=c(12, 15, 19, 19),
                 year2=c(22, 29, 18, 12))
base_wide  player year1 year2
1      A    12    22
2      B    15    29
3      C    19    18
4      D    19    12
# wide to long
long_df <- reshape(
  base_wide,
  direction = "long",
  varying = list(c("year1", "year2")),
  v.names = "points",
  idvar = "player",
  times = c("year1", "year2"),
  timevar = "year"
)
long_df        player  year points
A.year1      A year1     12
B.year1      B year1     15
C.year1      C year1     19
D.year1      D year1     19
A.year2      A year2     22
B.year2      B year2     29
C.year2      C year2     18
D.year2      D year2     12
long to wide
# long data
head(wide_to_long)   player   year points
   <char> <fctr>  <num>
1:      A  year1     12
2:      B  year1     15
3:      C  year1     19
4:      D  year1     19
5:      A  year2     22
6:      B  year2     29
# from long to wide
reshape(wide_to_long, direction = 'wide',idvar = 'player',
        timevar = 'year', v.names = 'points', sep = '_')   player points_year1 points_year2
   <char>        <num>        <num>
1:      A           12           22
2:      B           15           29
3:      C           19           18
4:      D           19           12
Join
x <- data.table(Id  = c("A", "B", "C", "C"),
                X1  = c(1L, 3L, 5L, 7L),
                XY  = c("x2", "x4", "x6", "x8"),
                key = "Id")
xKey: <Id>
       Id    X1     XY
   <char> <int> <char>
1:      A     1     x2
2:      B     3     x4
3:      C     5     x6
4:      C     7     x8
y <- data.table(Id  = c("A", "B", "B", "D"),
                Y1  = c(1L, 3L, 5L, 7L),
                XY  = c("y1", "y3", "y5", "y7"),
                key = "Id")
yKey: <Id>
       Id    Y1     XY
   <char> <int> <char>
1:      A     1     y1
2:      B     3     y3
3:      B     5     y5
4:      D     7     y7
y[x, on = "Id"]Key: <Id>
       Id    Y1     XY    X1   i.XY
   <char> <int> <char> <int> <char>
1:      A     1     y1     1     x2
2:      B     3     y3     3     x4
3:      B     5     y5     3     x4
4:      C    NA   <NA>     5     x6
5:      C    NA   <NA>     7     x8
# y[x, on = .(Id_x=Id_y)] # if different column name
# merge(x, y, all.x = TRUE, by = "Id")
# merge(x, y, all.x = TRUE, by.x = "x_column", by.y = "y_column") 
# if column name differentleft_join(x, y, by = "Id")Key: <Id>
       Id    X1   XY.x    Y1   XY.y
   <char> <int> <char> <int> <char>
1:      A     1     x2     1     y1
2:      B     3     x4     3     y3
3:      B     3     x4     5     y5
4:      C     5     x6    NA   <NA>
5:      C     7     x8    NA   <NA>
# left_join(x, y, by.x = "x_column", by.y= "y_column") # if column
#name differentx[y, on = "Id"]Key: <Id>
       Id    X1     XY    Y1   i.XY
   <char> <int> <char> <int> <char>
1:      A     1     x2     1     y1
2:      B     3     x4     3     y3
3:      B     3     x4     5     y5
4:      D    NA   <NA>     7     y7
# merge(x, y, all.y = TRUE, by = "Id")right_join(x, y, by = "Id")Key: <Id>
       Id    X1   XY.x    Y1   XY.y
   <char> <int> <char> <int> <char>
1:      A     1     x2     1     y1
2:      B     3     x4     3     y3
3:      B     3     x4     5     y5
4:      D    NA   <NA>     7     y7
matching rows from both x and y
x[y, on = "Id", nomatch = 0]Key: <Id>
       Id    X1     XY    Y1   i.XY
   <char> <int> <char> <int> <char>
1:      A     1     x2     1     y1
2:      B     3     x4     3     y3
3:      B     3     x4     5     y5
# merge(x, y)inner_join(x, y, by = "Id")Key: <Id>
       Id    X1   XY.x    Y1   XY.y
   <char> <int> <char> <int> <char>
1:      A     1     x2     1     y1
2:      B     3     x4     3     y3
3:      B     3     x4     5     y5
all the rows from x and y
merge(x, y, all = TRUE, by = "Id")Key: <Id>
       Id    X1   XY.x    Y1   XY.y
   <char> <int> <char> <int> <char>
1:      A     1     x2     1     y1
2:      B     3     x4     3     y3
3:      B     3     x4     5     y5
4:      C     5     x6    NA   <NA>
5:      C     7     x8    NA   <NA>
6:      D    NA   <NA>     7     y7
full_join(x, y, by = "Id")Key: <Id>
       Id    X1   XY.x    Y1   XY.y
   <char> <int> <char> <int> <char>
1:      A     1     x2     1     y1
2:      B     3     x4     3     y3
3:      B     3     x4     5     y5
4:      C     5     x6    NA   <NA>
5:      C     7     x8    NA   <NA>
6:      D    NA   <NA>     7     y7
Return rows from x matching y
unique(x[y$Id, on = "Id", nomatch = 0])Key: <Id>
       Id    X1     XY
   <char> <int> <char>
1:      A     1     x2
2:      B     3     x4
semi_join(x, y, by = "Id")Key: <Id>
       Id    X1     XY
   <char> <int> <char>
1:      A     1     x2
2:      B     3     x4
Return rows from x not matching y
x[!y, on = "Id"]Key: <Id>
       Id    X1     XY
   <char> <int> <char>
1:      C     5     x6
2:      C     7     x8
anti_join(x, y, by = "Id")Key: <Id>
       Id    X1     XY
   <char> <int> <char>
1:      C     5     x6
2:      C     7     x8
bind
x <- mtcars[1:5, 1:4]
y <- mtcars[6:10,1:4]
x     mpg   cyl  disp    hp
   <num> <num> <num> <num>
1:  21.0     6   160   110
2:  21.0     6   160   110
3:  22.8     4   108    93
4:  21.4     6   258   110
5:  18.7     8   360   175
y     mpg   cyl  disp    hp
   <num> <num> <num> <num>
1:  18.1     6 225.0   105
2:  14.3     8 360.0   245
3:  24.4     4 146.7    62
4:  22.8     4 140.8    95
5:  19.2     6 167.6   123
data.table::rbindlist(list(x_dt,y_dt))      mpg   cyl  disp    hp
    <num> <num> <num> <num>
 1:  21.0     6 160.0   110
 2:  21.0     6 160.0   110
 3:  22.8     4 108.0    93
 4:  21.4     6 258.0   110
 5:  18.7     8 360.0   175
 6:  18.1     6 225.0   105
 7:  14.3     8 360.0   245
 8:  24.4     4 146.7    62
 9:  22.8     4 140.8    95
10:  19.2     6 167.6   123
# data.table::rbindlist(list(x_dt,y_dt), 
                        # use.names = FALSE,
                          # fill = FALSE,
#                       idcol = 'id')dplyr::bind_rows(x,y)      mpg   cyl  disp    hp
    <num> <num> <num> <num>
 1:  21.0     6 160.0   110
 2:  21.0     6 160.0   110
 3:  22.8     4 108.0    93
 4:  21.4     6 258.0   110
 5:  18.7     8 360.0   175
 6:  18.1     6 225.0   105
 7:  14.3     8 360.0   245
 8:  24.4     4 146.7    62
 9:  22.8     4 140.8    95
10:  19.2     6 167.6   123
library(reticulate)
df_py <- reticulate::r_to_py(df)import pandas as pddf_python = r.df_py
# Print the Python dataframe
print(df_python)     mpg  cyl   disp     hp  drat     wt   qsec   vs   am  gear  carb
0   21.0  6.0  160.0  110.0  3.90  2.620  16.46  0.0  1.0   4.0   4.0
1   21.0  6.0  160.0  110.0  3.90  2.875  17.02  0.0  1.0   4.0   4.0
2   22.8  4.0  108.0   93.0  3.85  2.320  18.61  1.0  1.0   4.0   1.0
3   21.4  6.0  258.0  110.0  3.08  3.215  19.44  1.0  0.0   3.0   1.0
4   18.7  8.0  360.0  175.0  3.15  3.440  17.02  0.0  0.0   3.0   2.0
5   18.1  6.0  225.0  105.0  2.76  3.460  20.22  1.0  0.0   3.0   1.0
6   14.3  8.0  360.0  245.0  3.21  3.570  15.84  0.0  0.0   3.0   4.0
7   24.4  4.0  146.7   62.0  3.69  3.190  20.00  1.0  0.0   4.0   2.0
8   22.8  4.0  140.8   95.0  3.92  3.150  22.90  1.0  0.0   4.0   2.0
9   19.2  6.0  167.6  123.0  3.92  3.440  18.30  1.0  0.0   4.0   4.0
10  17.8  6.0  167.6  123.0  3.92  3.440  18.90  1.0  0.0   4.0   4.0
11  16.4  8.0  275.8  180.0  3.07  4.070  17.40  0.0  0.0   3.0   3.0
12  17.3  8.0  275.8  180.0  3.07  3.730  17.60  0.0  0.0   3.0   3.0
13  15.2  8.0  275.8  180.0  3.07  3.780  18.00  0.0  0.0   3.0   3.0
14  10.4  8.0  472.0  205.0  2.93  5.250  17.98  0.0  0.0   3.0   4.0
15  10.4  8.0  460.0  215.0  3.00  5.424  17.82  0.0  0.0   3.0   4.0
16  14.7  8.0  440.0  230.0  3.23  5.345  17.42  0.0  0.0   3.0   4.0
17  32.4  4.0   78.7   66.0  4.08  2.200  19.47  1.0  1.0   4.0   1.0
18  30.4  4.0   75.7   52.0  4.93  1.615  18.52  1.0  1.0   4.0   2.0
19  33.9  4.0   71.1   65.0  4.22  1.835  19.90  1.0  1.0   4.0   1.0
20  21.5  4.0  120.1   97.0  3.70  2.465  20.01  1.0  0.0   3.0   1.0
21  15.5  8.0  318.0  150.0  2.76  3.520  16.87  0.0  0.0   3.0   2.0
22  15.2  8.0  304.0  150.0  3.15  3.435  17.30  0.0  0.0   3.0   2.0
23  13.3  8.0  350.0  245.0  3.73  3.840  15.41  0.0  0.0   3.0   4.0
24  19.2  8.0  400.0  175.0  3.08  3.845  17.05  0.0  0.0   3.0   2.0
25  27.3  4.0   79.0   66.0  4.08  1.935  18.90  1.0  1.0   4.0   1.0
26  26.0  4.0  120.3   91.0  4.43  2.140  16.70  0.0  1.0   5.0   2.0
27  30.4  4.0   95.1  113.0  3.77  1.513  16.90  1.0  1.0   5.0   2.0
28  15.8  8.0  351.0  264.0  4.22  3.170  14.50  0.0  1.0   5.0   4.0
29  19.7  6.0  145.0  175.0  3.62  2.770  15.50  0.0  1.0   5.0   6.0
30  15.0  8.0  301.0  335.0  3.54  3.570  14.60  0.0  1.0   5.0   8.0
31  21.4  4.0  121.0  109.0  4.11  2.780  18.60  1.0  1.0   4.0   2.0
result_pandas = df_python.groupby('cyl').size().reset_index(name='n')
print(result_pandas)   cyl   n
0  4.0  11
1  6.0   7
2  8.0  14