diff --git a/autogen/agentchat/conversable_agent.py b/autogen/agentchat/conversable_agent.py
index 84e7dea8dec7..cc1b5e22407c 100644
--- a/autogen/agentchat/conversable_agent.py
+++ b/autogen/agentchat/conversable_agent.py
@@ -611,25 +611,26 @@ def generate_code_execution_reply(
         if messages is None:
             messages = self._oai_messages[sender]
         last_n_messages = code_execution_config.pop("last_n_messages", 1)
+
+        # iterate through the last n messages reversly
+        # if code blocks are found, execute the code blocks and return the output
+        # if no code blocks are found, continue
         for i in range(min(len(messages), last_n_messages)):
             message = messages[-(i + 1)]
             code_blocks = extract_code(message["content"])
             if len(code_blocks) == 1 and code_blocks[0][0] == UNKNOWN:
-                # no code block is found, lang should be `UNKNOWN`
-
-                if i == last_n_messages - 1:
-                    code_execution_config["last_n_messages"] = last_n_messages
-                    return False, None
                 continue
-                # code_blocks, _ = find_code(messages, sys_msg=self._oai_system_message, **self.llm_config)
-                # if len(code_blocks) == 1 and code_blocks[0][0] == UNKNOWN:
-                #     return code_blocks[0][1]
-            # try to execute the code
+
+            # found code blocks, execute code and push "last_n_messages" back
             exitcode, logs = self.execute_code_blocks(code_blocks)
+            code_execution_config["last_n_messages"] = last_n_messages
             exitcode2str = "execution succeeded" if exitcode == 0 else "execution failed"
-            break
+            return True, f"exitcode: {exitcode} ({exitcode2str})\nCode output: {logs}"
+
+        # no code blocks are found, push last_n_messages back and return.
         code_execution_config["last_n_messages"] = last_n_messages
-        return True, f"exitcode: {exitcode} ({exitcode2str})\nCode output: {logs}"
+
+        return False, None
 
     def generate_function_call_reply(
         self,
diff --git a/autogen/code_utils.py b/autogen/code_utils.py
index e5e0a7e3a6e3..e7d4faa8a6db 100644
--- a/autogen/code_utils.py
+++ b/autogen/code_utils.py
@@ -33,7 +33,14 @@ def infer_lang(code):
     """
     if code.startswith("python ") or code.startswith("pip") or code.startswith("python3 "):
         return "sh"
-    return "python"
+
+    # check if code is a valid python code
+    try:
+        compile(code, "test", "exec")
+        return "python"
+    except SyntaxError:
+        # not a valid python code
+        return UNKNOWN
 
 
 def extract_code(
@@ -258,7 +265,7 @@ def execute_code(
     file_dir = os.path.dirname(filepath)
     os.makedirs(file_dir, exist_ok=True)
     if code is not None:
-        with open(filepath, "w") as fout:
+        with open(filepath, "w", encoding="utf-8") as fout:
             fout.write(code)
     # check if already running in a docker container
     in_docker_container = os.path.exists("/.dockerenv")
diff --git a/test/agentchat/test_conversable_agent.py b/test/agentchat/test_conversable_agent.py
index d7ecc388fe8d..9debb6a3699d 100644
--- a/test/agentchat/test_conversable_agent.py
+++ b/test/agentchat/test_conversable_agent.py
@@ -76,6 +76,48 @@ def test_context():
     # expect hello there to be printed
 
 
+def test_generate_code_execution_reply():
+    agent = ConversableAgent(
+        "a0", max_consecutive_auto_reply=10, code_execution_config=False, llm_config=False, human_input_mode="NEVER"
+    )
+
+    dummy_messages = [
+        {
+            "content": "no code block",
+            "role": "user",
+        },
+        {
+            "content": "no code block",
+            "role": "user",
+        },
+    ]
+
+    code_message = {
+        "content": '```python\nprint("hello world")\n```',
+        "role": "user",
+    }
+
+    # scenario 1: if code_execution_config is not provided, the code execution should return false, none
+    assert agent.generate_code_execution_reply(dummy_messages, config=False) == (False, None)
+
+    # scenario 2: if code_execution_config is provided, but no code block is found, the code execution should return false, none
+    assert agent.generate_code_execution_reply(dummy_messages, config={}) == (False, None)
+
+    # scenario 3: if code_execution_config is provided, and code block is found, but it's not within the range of last_n_messages, the code execution should return false, none
+    assert agent.generate_code_execution_reply([code_message] + dummy_messages, config={"last_n_messages": 1}) == (
+        False,
+        None,
+    )
+
+    # scenario 4: if code_execution_config is provided, and code block is found, and it's within the range of last_n_messages, the code execution should return true, code block
+    agent._code_execution_config = {"last_n_messages": 3, "use_docker": False}
+    assert agent.generate_code_execution_reply([code_message] + dummy_messages) == (
+        True,
+        "exitcode: 0 (execution succeeded)\nCode output: \nhello world\n",
+    )
+    assert agent._code_execution_config["last_n_messages"] == 3
+
+
 def test_max_consecutive_auto_reply():
     agent = ConversableAgent("a0", max_consecutive_auto_reply=2, llm_config=False, human_input_mode="NEVER")
     agent1 = ConversableAgent("a1", max_consecutive_auto_reply=0, human_input_mode="NEVER")
diff --git a/test/test_code.py b/test/test_code.py
index 3c967f65c5b6..06c6f9fa1cd0 100644
--- a/test/test_code.py
+++ b/test/test_code.py
@@ -153,6 +153,10 @@ def test_infer_lang():
     assert infer_lang("print('hello world')") == "python"
     assert infer_lang("pip install autogen") == "sh"
 
+    # test infer lang for unknown code/invalid code
+    assert infer_lang("dummy text") == UNKNOWN
+    assert infer_lang("print('hello world'))") == UNKNOWN
+
 
 def test_extract_code():
     print(extract_code("```bash\npython temp.py\n```"))