diff --git a/rl_games/algos_torch/running_mean_std.py b/rl_games/algos_torch/running_mean_std.py
index 4cdad825..7a6e21df 100644
--- a/rl_games/algos_torch/running_mean_std.py
+++ b/rl_games/algos_torch/running_mean_std.py
@@ -81,7 +81,7 @@ def forward(self, input, denorm:bool=False, mask:Optional[torch.Tensor]=None):
             y = torch.sqrt(current_var.float() + self.epsilon)*y + current_mean.float()
         else:
             if self.norm_only:
-                y = input/ torch.sqrt(current_var.float() + self.epsilon)
+                y = input / torch.sqrt(current_var.float() + self.epsilon)
             else:
                 y = (input - current_mean.float()) / torch.sqrt(current_var.float() + self.epsilon)
                 y = torch.clamp(y, min=-5.0, max=5.0)
diff --git a/rl_games/configs/atari/ppo_breakout.yaml b/rl_games/configs/atari/ppo_breakout.yaml
index 95ceee8e..97b43674 100644
--- a/rl_games/configs/atari/ppo_breakout.yaml
+++ b/rl_games/configs/atari/ppo_breakout.yaml
@@ -17,7 +17,7 @@ params:
       activation: relu
       initializer:
         name: orthogonal_initializer
-        gain: 1.41421356237 
+        gain: 1.41421356237
       convs:    
         - filters: 32
           kernel_size: 8
@@ -31,7 +31,7 @@ params:
           kernel_size: 3
           strides: 1
           padding: 0
-      
+
     mlp:
       units: [512]
       activation: relu
@@ -55,7 +55,7 @@ params:
     learning_rate: 8e-4
     lr_schedule: None
     kl_threshold: 0.01
-    
+
     grad_norm: 1.0
     entropy_coef: 0.01
     truncate_grads: True 
diff --git a/rl_games/configs/atari/ppo_pacman_envpool_impala.yaml b/rl_games/configs/atari/ppo_pacman_envpool_impala.yaml
new file mode 100644
index 00000000..77ca4808
--- /dev/null
+++ b/rl_games/configs/atari/ppo_pacman_envpool_impala.yaml
@@ -0,0 +1,79 @@
+params:  
+  algo:
+    name: a2c_discrete
+
+  model:
+    name: discrete_a2c
+
+  network:
+    name: resnet_actor_critic
+    require_rewards: True
+    require_last_actions: True
+    separate: False
+    value_shape: 1
+    space: 
+      discrete:
+
+    cnn:
+      permute_input: False
+      conv_depths: [16, 32, 32]
+      activation: relu
+      initializer:
+        name: default
+      regularizer:
+        name: 'None'
+
+    mlp:
+      units: [512]
+      activation: relu
+      regularizer:
+        name: None
+      initializer:
+        name: default 
+    rnn:
+      name: lstm
+      units: 256
+      layers: 1
+
+  config:
+    name: pacman_impala
+    env_name: envpool
+    normalize_advantage: True
+    normalize_input: False
+    normalize_value: False
+    reward_shaper:
+      min_val: -100
+      max_val: 100
+      #scale_value: 0.01
+    gamma: 0.995
+    tau: 0.95
+    learning_rate: 3e-4
+    score_to_win: 100000
+    grad_norm: 1.5
+    entropy_coef: 0.01
+    truncate_grads: True
+    e_clip: 0.2
+    clip_value: True
+    num_actors: 64
+    horizon_length: 128
+    minibatch_size: 2048
+    mini_epochs: 2
+    critic_coef: 1
+    lr_schedule: None
+    kl_threshold: 0.01
+    use_diagnostics: True
+    seq_length: 32
+    max_epochs: 200000
+
+    env_config:
+      env_name: MsPacman-v5
+      episodic_life: True
+      has_lives: True
+      use_dict_obs_space: True
+
+    player:
+        render: False
+        games_num: 20
+        n_game_life: 3
+        deterministic: True
+
diff --git a/rl_games/configs/atari/ppo_pacman_torch_rnn.yaml b/rl_games/configs/atari/ppo_pacman_torch_rnn.yaml
index 195e5af9..f8eee1ee 100644
--- a/rl_games/configs/atari/ppo_pacman_torch_rnn.yaml
+++ b/rl_games/configs/atari/ppo_pacman_torch_rnn.yaml
@@ -5,8 +5,6 @@ params:
   model:
     name: discrete_a2c
 
-
-
   network:
     name: actor_critic
     separate: False
@@ -18,7 +16,7 @@ params:
       activation: relu
       initializer:
         name: glorot_normal_initializer
-        gain: 1.4142 
+        gain: 1.4142
       regularizer:
         name: 'None'
       convs:    
@@ -34,7 +32,6 @@ params:
           kernel_size: 3
           strides: 1
           padding: 0
-      
     mlp:
       units: [512]
       activation: relu
@@ -54,7 +51,7 @@ params:
       #min_val: -1
       #max_val: 1
       scale_value: 1
-        
+
     normalize_advantage: True
     gamma: 0.99
     tau: 0.95
@@ -78,10 +75,12 @@ params:
     normalize_input: False
     normalize_value: True
     max_epochs: 50000
+
     env_config:
       skip: 4
       name: 'MsPacmanNoFrameskip-v4'
       episode_life: True
+
     player:
         render: True
         games_num: 10
diff --git a/rl_games/configs/atari/ppo_pong_envpool_resnet.yaml b/rl_games/configs/atari/ppo_pong_envpool_resnet.yaml
new file mode 100644
index 00000000..945fd8c2
--- /dev/null
+++ b/rl_games/configs/atari/ppo_pong_envpool_resnet.yaml
@@ -0,0 +1,83 @@
+params:  
+  algo:
+    name: a2c_discrete
+
+  model:
+    name: discrete_a2c
+
+  network:
+    name: e2e_vision_actor_critic
+    separate: False
+    value_shape: 1
+    space: 
+      discrete:
+  
+    backbone:
+      type: resnet18 # can be efficientnet_v2_s #convnext_tiny #vit_b_16 #resnet18 #resnet34
+      pretrained: True
+      permute_input: False
+      freeze: False
+      preprocess_image: True
+      args:
+        zero_init_residual: True
+        norm_layer: None
+
+    mlp:
+      units: [512]
+      activation: relu
+      regularizer:
+        name: None
+      initializer:
+        name: default
+    rnn:
+      name: lstm
+      units: 512
+      layers: 1
+      before_mlp: True
+      concat_output: True
+
+  config:
+    name: Pong_resnet18_LSTM_MLP_512_concat_output_2e-4_linear_LR_norm
+    env_name: envpool
+    score_to_win: 20.0
+    mixed_precision: True
+    normalize_input: True
+    normalize_value: True
+    normalize_advantage: True
+    reward_shaper:
+      min_val: -1
+      max_val: 1
+    gamma: 0.99
+    tau: 0.95
+    grad_norm: 1.0
+    entropy_coef: 0.01
+    truncate_grads: True
+    e_clip: 0.2
+    clip_value: True
+    save_best_after: 25
+    save_frequency: 200
+    num_actors: 64
+    horizon_length: 128
+    minibatch_size: 2048
+    mini_epochs: 2
+    critic_coef: 1
+    learning_rate: 2e-4
+    lr_schedule: linear
+    kl_threshold: 0.01
+    use_diagnostics: True
+    seq_length: 8
+    max_epochs: 500
+    #weight_decay: 0.001
+
+    env_config:
+      env_name: Pong-v5
+      has_lives: False
+      use_dict_obs_space: False #True
+      stack_num: 1
+      gray_scale: False
+    player:
+        render: True
+        games_num: 10
+        n_game_life: 1
+        deterministic: True
+
diff --git a/rl_games/configs/maniskill/maniskill_ant.yaml b/rl_games/configs/maniskill/maniskill_ant.yaml
new file mode 100644
index 00000000..688e24fc
--- /dev/null
+++ b/rl_games/configs/maniskill/maniskill_ant.yaml
@@ -0,0 +1,65 @@
+params:
+  seed: 5
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    mlp:
+      units: [256, 128, 64]
+      activation: elu
+      initializer:
+        name: default
+
+  config:
+      name: AntRun
+      env_name: maniskill
+      normalize_input: True
+      normalize_value: True
+      value_bootstrap: True
+      reward_shaper:
+        scale_value: 1.0
+      normalize_advantage: True
+      gamma: 0.99
+      tau: 0.95
+
+      learning_rate: 3e-4
+      lr_schedule: adaptive
+      kl_threshold: 0.008
+      grad_norm: 1.0
+      entropy_coef: 0.0
+      truncate_grads: True
+      e_clip: 0.2
+      clip_value: True
+      use_smooth_clamp: True
+      bound_loss_type: regularisation
+      bounds_loss_coef: 0.0005
+      max_epochs: 1000
+      save_best_after: 25
+      save_frequency: 100
+      num_actors: 4096
+      horizon_length: 16
+      minibatch_size: 32768
+      mini_epochs: 4
+      critic_coef: 2
+
+      env_config:
+        env_name: MS-HumanoidRun-v1
+
+      player:
+        render: True
+        render_sleep: 0.0
\ No newline at end of file
diff --git a/rl_games/configs/maniskill/maniskill_pickcube_impala.yaml b/rl_games/configs/maniskill/maniskill_pickcube_impala.yaml
index 8f3f4786..0b86d9f2 100644
--- a/rl_games/configs/maniskill/maniskill_pickcube_impala.yaml
+++ b/rl_games/configs/maniskill/maniskill_pickcube_impala.yaml
@@ -23,9 +23,9 @@ params:
       continuous:
         mu_activation: None
         sigma_activation: None
-
         mu_init:
           name: default
+          scale: 0.02
         sigma_init:
           name: const_initializer
           val: 0
@@ -35,25 +35,27 @@ params:
       conv_depths: [16, 32, 32]
       activation: relu
       initializer:
-        name: default
+        name: orthogonal_initializer
+        gain: 1.41421356237
       regularizer:
         name: None
     mlp:
-      units: [512, 256]
+      units: [256]
       activation: elu
       regularizer:
         name: None
       initializer:
         name: default
-    # rnn:
-    #   name: lstm
-    #   units: 512
-    #   layers: 1
-    #   before_mlp: True
-    #   concat_output: True
+    rnn:
+      name: lstm
+      layer_norm: True
+      units: 512
+      layers: 1
+      before_mlp: True
+      concat_output: True
 
   config:
-    name: PickCube_RGB_impala
+    name: PickCube_RGB_impala_lstm_init_2e-4_linear_lr
     env_name: maniskill
     reward_shaper:
       scale_value: 1.0
@@ -70,17 +72,17 @@ params:
       scale_value: 1.0
     gamma: 0.99
     tau : 0.95
-    learning_rate: 1e-4
-    lr_schedule: adaptive
+    learning_rate: 2e-4
+    lr_schedule: linear
     kl_threshold: 0.008
-    max_epochs: 10000
+    max_epochs: 20000
     save_best_after: 25
     save_frequency: 500
     grad_norm: 1.0
     entropy_coef: 0.0
     truncate_grads: True
     e_clip: 0.2
-    horizon_length: 16
+    horizon_length: 32
     minibatch_size: 2048
     mini_epochs: 2
     critic_coef: 1
diff --git a/rl_games/configs/maniskill/maniskill_pickcube_impala_lstm.yaml b/rl_games/configs/maniskill/maniskill_pickcube_impala_lstm.yaml
new file mode 100644
index 00000000..a0a71480
--- /dev/null
+++ b/rl_games/configs/maniskill/maniskill_pickcube_impala_lstm.yaml
@@ -0,0 +1,102 @@
+params:
+  seed: 42
+
+  # environment wrapper clipping
+  env:
+    # added to the wrapper
+    clip_observations: 5.0
+    # can make custom wrapper?
+    clip_actions: 1.0
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: vision_actor_critic
+    require_rewards: False
+    require_last_actions: False
+    separate: False
+    value_shape: 1
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    cnn:
+      permute_input: True
+      conv_depths: [16, 32, 32]
+      activation: relu
+      initializer:
+        name: default
+      regularizer:
+        name: None
+    mlp:
+      units: [256]
+      activation: elu
+      regularizer:
+        name: None
+      initializer:
+        name: default
+    rnn:
+      name: lstm
+      layer_norm: True
+      units: 512
+      layers: 1
+      before_mlp: True
+      concat_output: True
+
+  config:
+    name: PickCube_RGB_impala_LSTM_norm
+    env_name: maniskill
+    reward_shaper:
+      scale_value: 1.0
+    device: cuda:0
+    device_name: cuda:0
+    multi_gpu: False
+    ppo: True
+    mixed_precision: True
+    normalize_input: False
+    normalize_value: True
+    normalize_advantage: True
+    num_actors: 256
+    reward_shaper:
+      scale_value: 1.0
+    gamma: 0.99
+    tau : 0.95
+    learning_rate: 2e-4
+    lr_schedule: linear
+    kl_threshold: 0.008
+    max_epochs: 20000
+    save_best_after: 25
+    save_frequency: 500
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 32
+    minibatch_size: 1024 #2048
+    mini_epochs: 2
+    critic_coef: 1
+    clip_value: True
+    seq_length: 8
+    bounds_loss_coef: 0.0001
+    #weight_decay: 0.001
+
+    env_config:
+      env_name: PickCube-v1
+      obs_mode: rgbd
+      control_mode: pd_ee_delta_pose
+      reward_mode: dense
+
+    player:
+        render: True
+        deterministic: True
+
diff --git a/rl_games/configs/maniskill/maniskill_pickcube_impala_small_lstm.yaml b/rl_games/configs/maniskill/maniskill_pickcube_impala_small_lstm.yaml
new file mode 100644
index 00000000..39ea525a
--- /dev/null
+++ b/rl_games/configs/maniskill/maniskill_pickcube_impala_small_lstm.yaml
@@ -0,0 +1,103 @@
+params:
+  seed: 42
+
+  # environment wrapper clipping
+  env:
+    # added to the wrapper
+    clip_observations: 5.0
+    # can make custom wrapper?
+    clip_actions: 1.0
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: vision_actor_critic
+    require_rewards: False
+    require_last_actions: False
+    separate: False
+    value_shape: 1
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+        mu_init:
+          name: default
+          scale: 0.02
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    cnn:
+      permute_input: True
+      conv_depths: [8, 16, 16]
+      activation: relu
+      initializer:
+        name: orthogonal_initializer
+        gain: 1.41421356237
+      regularizer:
+        name: None
+    mlp:
+      units: [256]
+      activation: elu
+      regularizer:
+        name: None
+      initializer:
+        name: default
+    rnn:
+      name: lstm
+      layer_norm: True
+      units: 512
+      layers: 1
+      before_mlp: True
+      concat_output: True
+
+  config:
+    name: PickCube_RGB_Impala_Small_LSTM_norm_embedding
+    env_name: maniskill
+    reward_shaper:
+      scale_value: 1.0
+    device: cuda:0
+    device_name: cuda:0
+    multi_gpu: False
+    ppo: True
+    mixed_precision: True
+    normalize_input: False
+    normalize_value: True
+    normalize_advantage: True
+    num_actors: 256
+    reward_shaper:
+      scale_value: 1.0
+    gamma: 0.99
+    tau : 0.95
+    learning_rate: 2e-4
+    lr_schedule: None
+    kl_threshold: 0.008
+    max_epochs: 10000
+    save_best_after: 25
+    save_frequency: 500
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 32
+    minibatch_size: 2048
+    mini_epochs: 2
+    critic_coef: 1
+    clip_value: True
+    seq_length: 8
+    bounds_loss_coef: 0.0001
+    #weight_decay: 0.001
+
+    env_config:
+      env_name: PickCube-v1
+      obs_mode: rgbd
+      control_mode: pd_ee_delta_pose
+      reward_mode: dense
+
+    player:
+        render: False
+        deterministic: True
+
diff --git a/rl_games/configs/maniskill/maniskill_pickcube_vision.yaml b/rl_games/configs/maniskill/maniskill_pickcube_vision.yaml
index 232d3b7c..0b05ff83 100644
--- a/rl_games/configs/maniskill/maniskill_pickcube_vision.yaml
+++ b/rl_games/configs/maniskill/maniskill_pickcube_vision.yaml
@@ -24,6 +24,7 @@ params:
 
         mu_init:
           name: default
+          scale: 0.02
         sigma_init:
           name: const_initializer
           val: 0
@@ -39,7 +40,7 @@ params:
         zero_init_residual: True
         norm_layer: None
     mlp:
-      units: [512, 256]
+      units: [512]
       activation: elu
       regularizer:
         name: None
@@ -71,16 +72,16 @@ params:
     gamma: 0.99
     tau : 0.95
     learning_rate: 1e-4
-    lr_schedule: adaptive
+    lr_schedule: None
     kl_threshold: 0.008
-    max_epochs: 20000
+    max_epochs: 50000
     save_best_after: 25
     save_frequency: 500
     grad_norm: 1.0
     entropy_coef: 0.0
     truncate_grads: True
     e_clip: 0.2
-    horizon_length: 16
+    horizon_length: 64
     minibatch_size: 512
     mini_epochs: 2
     critic_coef: 1
diff --git a/rl_games/networks/vision_networks.py b/rl_games/networks/vision_networks.py
index 634745d6..15e13de9 100644
--- a/rl_games/networks/vision_networks.py
+++ b/rl_games/networks/vision_networks.py
@@ -1,6 +1,7 @@
 import torch
 from torch import nn
 from torchvision import models
+from rl_games.algos_torch.running_mean_std import RunningMeanStd, RunningMeanStdObs
 import torch.nn.functional as F
 from rl_games.algos_torch import torch_ext
 from rl_games.algos_torch.network_builder import NetworkBuilder, ImpalaSequential
@@ -21,10 +22,13 @@ def __init__(self, params, **kwargs):
             if type(full_input_shape) is dict:
                 input_shape = full_input_shape['camera']
                 proprio_shape = full_input_shape['proprio']
+
                 proprio_size = proprio_shape[0]
             else:
                 input_shape = full_input_shape
 
+            self.normalize_emb = kwargs.pop('normalize_emb', False)
+
             self.num_seqs = kwargs.pop('num_seqs', 1)
             self.value_size = kwargs.pop('value_size', 1)
 
@@ -42,6 +46,9 @@ def __init__(self, params, **kwargs):
             else:
                 out_size = self.units[-1]
 
+            self.running_mean_std = torch.jit.script(RunningMeanStd((mlp_input_size,)))
+            self.layer_norm_emb = torch.nn.LayerNorm(mlp_input_size)
+
             if self.has_rnn:
                 if not self.is_rnn_before_mlp:
                     rnn_in_size = out_size
@@ -55,7 +62,7 @@ def __init__(self, params, **kwargs):
 
             mlp_args = {
                 'input_size' : mlp_input_size,
-                'units' :self.units,
+                'units' : self.units,
                 'activation' : self.activation,
                 'norm_func_name' : self.normalization,
                 'dense_func' : torch.nn.Linear
@@ -101,6 +108,14 @@ def __init__(self, params, **kwargs):
 
             mlp_init(self.value.weight)
 
+        def norm_emb(self, embedding):
+            #with torch.no_grad():
+            return self.running_mean_std(embedding) if self.normalize_emb else embedding
+                # if len(self.units) == 0:
+                #     out_size = cnn_output_size
+                # else:
+                #     out_size = self.units[-1]
+
         def forward(self, obs_dict):
             obs = obs_dict['obs']['camera']
             proprio = obs_dict['obs']['proprio']
@@ -117,6 +132,9 @@ def forward(self, obs_dict):
             out = self.flatten_act(out)
 
             out = torch.cat([out, proprio], dim=1)
+            #print('out shape: ', out.shape)
+            #out = self.norm_emb(out)
+            out = self.layer_norm_emb(out)
 
             if self.has_rnn:
                 seq_length = obs_dict.get('seq_length', 1)