diff --git a/.github/workflows/amd.yml b/.github/workflows/amd.yml
index 975339952c12..97b96f2d5875 100644
--- a/.github/workflows/amd.yml
+++ b/.github/workflows/amd.yml
@@ -28,7 +28,6 @@ jobs:
       # Runs a single command using the runners shell
       - name: environment
         run: |
-          echo "JobID: $AISC_NODE_INSTANCE_ID"
           rocm-smi --showhw
           which python
           python --version
diff --git a/.github/workflows/nv-accelerate-v100.yml b/.github/workflows/nv-accelerate-v100.yml
index 436e11b3f822..cc1abfc5717d 100644
--- a/.github/workflows/nv-accelerate-v100.yml
+++ b/.github/workflows/nv-accelerate-v100.yml
@@ -24,7 +24,6 @@ jobs:
 
       - name: environment
         run: |
-          echo "JobID: $AISC_NODE_INSTANCE_ID"
           nvidia-smi
           which python
           python --version
diff --git a/.github/workflows/nv-inference.yml b/.github/workflows/nv-inference.yml
index dc00682edd46..681fc90be78f 100644
--- a/.github/workflows/nv-inference.yml
+++ b/.github/workflows/nv-inference.yml
@@ -24,7 +24,6 @@ jobs:
 
       - name: environment
         run: |
-          echo "JobID: $AISC_NODE_INSTANCE_ID"
           nvidia-smi
           which python
           python --version
@@ -41,7 +40,7 @@ jobs:
           git clone https://github.com/huggingface/transformers
           cd transformers
           # if needed switch to the last known good SHA until transformers@master is fixed
-          git checkout v4.21.2
+          # git checkout 1cc453d33
           git rev-parse --short HEAD
           pip uninstall --yes transformers
           pip install .
@@ -62,5 +61,4 @@ jobs:
           if [[ -d ./torch-extensions ]]; then rm -rf ./torch-extensions; fi
           cd tests
           EXPECTED_TORCH=$(pip index versions torch | grep -oP -m1 "^\s*LATEST.*\s\K\d+\.\d+")
-          TRANSFORMERS_CACHE=/blob/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest --color=yes --durations=0 --verbose -m 'seq_inference' unit/ --torch_ver=$EXPECTED_TORCH --cuda_ver="11.3"
           TRANSFORMERS_CACHE=/blob/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest --color=yes --durations=0 -n 4 --verbose -m 'inference' unit/ --torch_ver=$EXPECTED_TORCH --cuda_ver="11.3"
diff --git a/.github/workflows/nv-lightning-v100.yml b/.github/workflows/nv-lightning-v100.yml
index a63f6b75e769..9fe1043b96d0 100644
--- a/.github/workflows/nv-lightning-v100.yml
+++ b/.github/workflows/nv-lightning-v100.yml
@@ -24,7 +24,6 @@ jobs:
 
       - name: environment
         run: |
-          echo "JobID: $AISC_NODE_INSTANCE_ID"
           nvidia-smi
           which python
           python --version
diff --git a/.github/workflows/nv-nightly.yml b/.github/workflows/nv-nightly.yml
index 40abe198c7cd..98afd75105a3 100644
--- a/.github/workflows/nv-nightly.yml
+++ b/.github/workflows/nv-nightly.yml
@@ -17,7 +17,6 @@ jobs:
 
       - name: environment
         run: |
-          echo "JobID: $AISC_NODE_INSTANCE_ID"
           nvidia-smi
           which python
           python --version
diff --git a/.github/workflows/nv-torch-latest-v100.yml b/.github/workflows/nv-torch-latest-v100.yml
index c8a3c32bdc36..1dc535f9b327 100644
--- a/.github/workflows/nv-torch-latest-v100.yml
+++ b/.github/workflows/nv-torch-latest-v100.yml
@@ -24,7 +24,6 @@ jobs:
 
       - name: environment
         run: |
-          echo "JobID: $AISC_NODE_INSTANCE_ID"
           nvidia-smi
           which python
           python --version
diff --git a/.github/workflows/nv-torch-nightly-v100.yml b/.github/workflows/nv-torch-nightly-v100.yml
index d456483c72f5..e1c916afba2d 100644
--- a/.github/workflows/nv-torch-nightly-v100.yml
+++ b/.github/workflows/nv-torch-nightly-v100.yml
@@ -17,7 +17,6 @@ jobs:
 
       - name: environment
         run: |
-          echo "JobID: $AISC_NODE_INSTANCE_ID"
           nvidia-smi
           which python
           python --version
diff --git a/.github/workflows/nv-torch12-p40.yml b/.github/workflows/nv-torch12-p40.yml
index 29b9f891c3bb..944ba3beb19d 100644
--- a/.github/workflows/nv-torch12-p40.yml
+++ b/.github/workflows/nv-torch12-p40.yml
@@ -24,7 +24,6 @@ jobs:
 
       - name: environment
         run: |
-          echo "JobID: $AISC_NODE_INSTANCE_ID"
           nvidia-smi
           which python
           python --version
diff --git a/.github/workflows/nv-torch18-v100.yml b/.github/workflows/nv-torch18-v100.yml
index 2971416ebd81..b512ea29113f 100644
--- a/.github/workflows/nv-torch18-v100.yml
+++ b/.github/workflows/nv-torch18-v100.yml
@@ -24,7 +24,6 @@ jobs:
 
       - name: environment
         run: |
-          echo "JobID: $AISC_NODE_INSTANCE_ID"
           nvidia-smi
           which python
           python --version
diff --git a/.github/workflows/nv-transformers-v100.yml b/.github/workflows/nv-transformers-v100.yml
index bfc9919be1f9..945457b304c5 100644
--- a/.github/workflows/nv-transformers-v100.yml
+++ b/.github/workflows/nv-transformers-v100.yml
@@ -24,7 +24,6 @@ jobs:
 
       - name: environment
         run: |
-          echo "JobID: $AISC_NODE_INSTANCE_ID"
           nvidia-smi
           which python
           python --version
diff --git a/tests/pytest.ini b/tests/pytest.ini
index b7ee315be801..a52a49e5bbc3 100644
--- a/tests/pytest.ini
+++ b/tests/pytest.ini
@@ -1,7 +1,6 @@
 [pytest]
-addopts = -m "not sequential and not nightly and not inference and not seq_inference"
+addopts = -m "not sequential and not nightly and not inference"
 markers =
     sequential:Tests that need to be run sequentially
     inference:Inference model tests
-    seq_inference:Inference model tests to run sequentially
     nightly:Tests that should be run nightly
diff --git a/tests/unit/inference/test_inference.py b/tests/unit/inference/test_inference.py
index 1b1efdc595fe..cdc3e83232de 100644
--- a/tests/unit/inference/test_inference.py
+++ b/tests/unit/inference/test_inference.py
@@ -52,7 +52,7 @@ def lm_eval_imports():
     "distilgpt2",
     "Norod78/hebrew-bad_wiki-gpt_neo-tiny",
     "EleutherAI/gpt-j-6B",
-    "bigscience/bloom-560m",
+    "bigscience/bloom-350m",
 ]
 _opt_models = [
     "facebook/opt-125m",        # 125m, 1.7B, ..., 175B variants have the same model architecture.
@@ -111,7 +111,6 @@ def enable_cuda_graph(request):
 @pytest.fixture()
 def invalid_model_task_config(model_w_task, dtype, enable_cuda_graph):
     model, task = model_w_task
-    msg = ""
     if pkg_version.parse(torch.__version__) <= pkg_version.parse("1.2"):
         msg = "DS inference injection doesn't work well on older torch versions"
     elif model not in pytest.all_models[task]:
@@ -121,17 +120,10 @@ def invalid_model_task_config(model_w_task, dtype, enable_cuda_graph):
     elif enable_cuda_graph and pkg_version.parse(
             torch.__version__) < pkg_version.parse("1.10"):
         msg = "CUDA Graph is only available in torch versions >= 1.10"
-    elif "gpt-j-6B" in model:
-        if dtype != torch.half:
-            msg = f"Not enough GPU memory to run {model} with dtype {dtype}"
-        elif enable_cuda_graph:
-            msg = f"Not enough GPU memory to run {model} with CUDA Graph enabled"
-    elif "gpt-neox-20b" in model:  # TODO: remove this when neox issues resolved
-        msg = "Skipping gpt-neox-20b for now"
-    elif ("gpt-neox-20b" in model) and (dtype != torch.half):
+    elif ("gpt-j-6B" in model) and (dtype == torch.float):
         msg = f"Not enough GPU memory to run {model} with dtype {dtype}"
-    elif ("bloom" in model) and (dtype != torch.half):
-        msg = f"Bloom models only support half precision, cannot use dtype {dtype}"
+    else:
+        msg = ""
     return msg
 
 
@@ -168,7 +160,7 @@ def query(model_w_task):
 def inf_kwargs(model_w_task):
     model, task = model_w_task
     if task == "text-generation":
-        return {"do_sample": False, "max_length": 20}
+        return {"do_sample": False}
     else:
         return {}
 
@@ -236,9 +228,7 @@ def test(
         local_rank = int(os.getenv("LOCAL_RANK", "0"))
 
         if "gpt-j-6B" in model and dtype == torch.half:
-            _model = AutoModelForCausalLM.from_pretrained(model,
-                                                          revision="float16",
-                                                          torch_dtype=torch.float16)
+            _model = AutoModelForCausalLM.from_pretrained(model)
             tokenizer = AutoTokenizer.from_pretrained(model)
             _model.half()
             pipe = pipeline(
@@ -279,9 +269,7 @@ def test(
         torch.cuda.synchronize()
         ds_time = time.time() - start
 
-        # facebook/opt* and some bigscient/bloom* models are not matching
-        # baseline exactly, adding an exception to them for now
-        if ("opt" in model) or ("bloom" in model):
+        if task == "text-generation":
             bs_output = pipe(query, **inf_kwargs)
 
         # These performance tests are only measuring the time for a single
@@ -290,58 +278,6 @@ def test(
         assert assert_fn(bs_output, ds_output)
 
 
-@pytest.mark.seq_inference
-@pytest.mark.parametrize("model_w_task",
-                         [("gpt2",
-                           "text-generation"),
-                          ("EleutherAI/gpt-neox-20b",
-                           "text-generation"),
-                          ("bigscience/bloom-3b",
-                           "text-generation")],
-                         ids=["gpt2",
-                              "gpt-neox",
-                              "bloom"])
-class TestMPSize(DistributedTest):
-    world_size = 4
-
-    def test(
-        self,
-        model_w_task,
-        dtype,
-        enable_cuda_graph,
-        query,
-        inf_kwargs,
-        assert_fn,
-        invalid_model_task_config,
-    ):
-        if invalid_model_task_config:
-            pytest.skip(invalid_model_task_config)
-
-        model, task = model_w_task
-        local_rank = int(os.getenv("LOCAL_RANK", "0"))
-
-        # We have to load these large models on CPU with pipeline because not
-        # enough GPU memory
-        pipe = pipeline(task, model=model, device=-1, framework="pt")
-        bs_output = pipe(query, **inf_kwargs)
-
-        pipe.model = deepspeed.init_inference(
-            pipe.model,
-            mp_size=self.world_size,
-            dtype=dtype,
-            replace_method="auto",
-            replace_with_kernel_inject=True,
-            enable_cuda_graph=enable_cuda_graph,
-        )
-        # Switch device to GPU so that input tensors are not on CPU
-        pipe.device = torch.device(f"cuda:{local_rank}")
-        ds_output = pipe(query, **inf_kwargs)
-
-        print(local_rank, "baseline", bs_output)
-        print(local_rank, "deepspeed", ds_output)
-        assert assert_fn(bs_output, ds_output)
-
-
 @pytest.mark.nightly
 @pytest.mark.parametrize(
     "model_family, model_name",