diff --git a/.gitignore b/.gitignore
index d77a5b43ffc..4375c147bf2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,7 +8,7 @@
 **/*.tar.gz
 **/playground
 **/wandb
-
+**/tensorboard_log
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/recipe/grpo/grpo_ray_trainer.py b/recipe/grpo/grpo_ray_trainer.py
new file mode 100644
index 00000000000..c2c00a4ccb8
--- /dev/null
+++ b/recipe/grpo/grpo_ray_trainer.py
@@ -0,0 +1,314 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+FSDP PPO Trainer with Ray-based single controller.
+This trainer supports model-agonistic model initialization with huggingface
+"""
+import ray
+import time
+import uuid
+from collections import defaultdict
+from copy import deepcopy
+from pprint import pprint
+
+import numpy as np
+import torch
+from tqdm import tqdm
+
+from verl import DataProto
+from verl.trainer.ppo.core_algos import agg_loss
+from verl.trainer.ppo.metric_utils import (
+    compute_data_metrics,
+    compute_throughout_metrics,
+    compute_timing_metrics,
+    reduce_metrics,
+)
+from verl.trainer.ppo.ray_trainer import AdvantageEstimator, RayPPOTrainer, _timer, apply_kl_penalty, compute_advantage, compute_response_mask
+from verl.trainer.ppo.reward import compute_reward, compute_reward_async
+
+
+class RayGRPOTrainer(RayPPOTrainer):
+    def fit(self):
+        """
+        The training loop of PPO.
+        The driver process only need to call the compute functions of the worker group through RPC
+        to construct the PPO dataflow.
+        The light-weight advantage computation is done on the driver process.
+        """
+        from omegaconf import OmegaConf
+
+        from verl.utils.tracking import Tracking
+
+        logger = Tracking(
+            project_name=self.config.trainer.project_name,
+            experiment_name=self.config.trainer.experiment_name,
+            default_backend=self.config.trainer.logger,
+            config=OmegaConf.to_container(self.config, resolve=True),
+        )
+
+        self.global_steps = 0
+
+        # load checkpoint before doing anything
+        self._load_checkpoint()
+
+        # perform validation before training
+        # currently, we only support validation using the reward_function.
+        if self.val_reward_fn is not None and self.config.trainer.get("val_before_train", True):
+            val_metrics = self._validate()
+            assert val_metrics, f"{val_metrics=}"
+            pprint(f"Initial validation metrics: {val_metrics}")
+            logger.log(data=val_metrics, step=self.global_steps)
+            if self.config.trainer.get("val_only", False):
+                return
+
+        # add tqdm
+        progress_bar = tqdm(total=self.total_training_steps, initial=self.global_steps, desc="Training Progress")
+
+        # we start from step 1
+        self.global_steps += 1
+        last_val_metrics = None
+
+        for epoch in range(self.config.trainer.total_epochs):
+            for batch_dict in self.train_dataloader:
+                metrics = {}
+                timing_raw = {}
+                batch: DataProto = DataProto.from_single_dict(batch_dict)
+
+                # pop those keys for generation
+                batch_keys_to_pop = ["input_ids", "attention_mask", "position_ids"]
+                non_tensor_batch_keys_to_pop = ["raw_prompt_ids"]
+                if "multi_modal_data" in batch.non_tensor_batch:
+                    non_tensor_batch_keys_to_pop.append("multi_modal_data")
+                if "raw_prompt" in batch.non_tensor_batch:
+                    non_tensor_batch_keys_to_pop.append("raw_prompt")
+                if "tools_kwargs" in batch.non_tensor_batch:
+                    non_tensor_batch_keys_to_pop.append("tools_kwargs")
+                gen_batch = batch.pop(
+                    batch_keys=batch_keys_to_pop,
+                    non_tensor_batch_keys=non_tensor_batch_keys_to_pop,
+                )
+
+                is_last_step = self.global_steps >= self.total_training_steps
+
+                with _timer("step", timing_raw):
+                    # generate a batch
+                    with _timer("gen", timing_raw):
+                        if not self.async_rollout_mode:
+                            gen_batch_output = self.actor_rollout_wg.generate_sequences(gen_batch)
+                        else:
+                            self.async_rollout_manager.wake_up()
+                            gen_batch_output = self.async_rollout_manager.generate_sequences(gen_batch)
+                            self.async_rollout_manager.sleep()
+
+                    if self.config.algorithm.adv_estimator == AdvantageEstimator.REMAX:
+                        with _timer("gen_max", timing_raw):
+                            gen_baseline_batch = deepcopy(gen_batch)
+                            gen_baseline_batch.meta_info["do_sample"] = False
+                            gen_baseline_output = self.actor_rollout_wg.generate_sequences(gen_baseline_batch)
+
+                            batch = batch.union(gen_baseline_output)
+                            reward_baseline_tensor = self.reward_fn(batch)
+                            reward_baseline_tensor = reward_baseline_tensor.sum(dim=-1)
+
+                            batch.pop(batch_keys=list(gen_baseline_output.batch.keys()))
+
+                            batch.batch["reward_baselines"] = reward_baseline_tensor
+
+                            del gen_baseline_batch, gen_baseline_output
+
+                    batch.non_tensor_batch["uid"] = np.array([str(uuid.uuid4()) for _ in range(len(batch.batch))], dtype=object)
+                    # repeat to align with repeated responses in rollout
+                    batch = batch.repeat(repeat_times=self.config.actor_rollout_ref.rollout.n, interleave=True)
+                    batch = batch.union(gen_batch_output)
+
+                    batch.batch["response_mask"] = compute_response_mask(batch)
+                    # Balance the number of valid tokens across DP ranks.
+                    # NOTE: This usually changes the order of data in the `batch`,
+                    # which won't affect the advantage calculation (since it's based on uid),
+                    # but might affect the loss calculation (due to the change of mini-batching).
+                    # TODO: Decouple the DP balancing and mini-batching.
+                    if self.config.trainer.balance_batch:
+                        self._balance_batch(batch, metrics=metrics)
+
+                    # compute global_valid tokens
+                    batch.meta_info["global_token_num"] = torch.sum(batch.batch["attention_mask"], dim=-1).tolist()
+
+                    with _timer("reward", timing_raw):
+                        # compute reward model score
+                        if self.use_rm:
+                            reward_tensor = self.rm_wg.compute_rm_score(batch)
+                            batch = batch.union(reward_tensor)
+
+                        if self.config.reward_model.launch_reward_fn_async:
+                            future_reward = compute_reward_async.remote(batch, self.config, self.tokenizer)
+                        else:
+                            reward_tensor, reward_extra_infos_dict = compute_reward(batch, self.reward_fn)
+
+                    # recompute old_log_probs
+                    with _timer("old_log_prob", timing_raw):
+                        old_log_prob = self.actor_rollout_wg.compute_log_prob(batch)
+                        entropys = old_log_prob.batch["entropys"]
+                        response_masks = batch.batch["response_mask"]
+                        loss_agg_mode = self.config.actor_rollout_ref.actor.loss_agg_mode
+                        entropy_loss = agg_loss(loss_mat=entropys, loss_mask=response_masks, loss_agg_mode=loss_agg_mode)
+                        old_log_prob_metrics = {"actor/entropy_loss": entropy_loss.detach().item()}
+                        metrics.update(old_log_prob_metrics)
+                        old_log_prob.batch.pop("entropys")
+                        batch = batch.union(old_log_prob)
+
+                        if "rollout_log_probs" in batch.batch.keys():
+                            # TODO: we may want to add diff of probs too.
+                            rollout_old_log_probs = batch.batch["rollout_log_probs"]
+                            actor_old_log_probs = batch.batch["old_log_probs"]
+                            attention_mask = batch.batch["attention_mask"]
+                            responses = batch.batch["responses"]
+                            response_length = responses.size(1)
+                            response_mask = attention_mask[:, -response_length:]
+
+                            rollout_probs = torch.exp(rollout_old_log_probs)
+                            actor_probs = torch.exp(actor_old_log_probs)
+                            rollout_probs_diff = torch.abs(rollout_probs - actor_probs)
+                            rollout_probs_diff = torch.masked_select(rollout_probs_diff, response_mask.bool())
+                            rollout_probs_diff_max = torch.max(rollout_probs_diff)
+                            rollout_probs_diff_mean = torch.mean(rollout_probs_diff)
+                            rollout_probs_diff_std = torch.std(rollout_probs_diff)
+                            metrics.update(
+                                {
+                                    "training/rollout_probs_diff_max": rollout_probs_diff_max.detach().item(),
+                                    "training/rollout_probs_diff_mean": rollout_probs_diff_mean.detach().item(),
+                                    "training/rollout_probs_diff_std": rollout_probs_diff_std.detach().item(),
+                                }
+                            )
+
+                    if self.use_reference_policy:
+                        # compute reference log_prob
+                        with _timer("ref", timing_raw):
+                            if not self.ref_in_actor:
+                                ref_log_prob = self.ref_policy_wg.compute_ref_log_prob(batch)
+                            else:
+                                ref_log_prob = self.actor_rollout_wg.compute_ref_log_prob(batch)
+                            batch = batch.union(ref_log_prob)
+
+                    # compute values
+                    if self.use_critic:
+                        with _timer("values", timing_raw):
+                            values = self.critic_wg.compute_values(batch)
+                            batch = batch.union(values)
+
+                    with _timer("adv", timing_raw):
+                        # we combine with rule-based rm
+                        reward_extra_infos_dict: dict[str, list]
+                        if self.config.reward_model.launch_reward_fn_async:
+                            reward_tensor, reward_extra_infos_dict = ray.get(future_reward)
+                        batch.batch["token_level_scores"] = reward_tensor
+
+                        print(f"{list(reward_extra_infos_dict.keys())=}")
+                        if reward_extra_infos_dict:
+                            batch.non_tensor_batch.update({k: np.array(v) for k, v in reward_extra_infos_dict.items()})
+                            metrics.update({
+                                **{f"critic/rewards/{k}/mean": np.mean(v) for k, v in reward_extra_infos_dict.items() if '_sub' in k},
+                                **{f"critic/rewards/{k}/max": np.max(v) for k, v in reward_extra_infos_dict.items() if '_sub' in k},
+                                **{f"critic/rewards/{k}/min": np.min(v) for k, v in reward_extra_infos_dict.items() if '_sub' in k},
+                            })
+                        # compute rewards. apply_kl_penalty if available
+                        if self.config.algorithm.use_kl_in_reward:
+                            batch, kl_metrics = apply_kl_penalty(batch, kl_ctrl=self.kl_ctrl_in_reward, kl_penalty=self.config.algorithm.kl_penalty)
+                            metrics.update(kl_metrics)
+                        else:
+                            batch.batch["token_level_rewards"] = batch.batch["token_level_scores"]
+
+                        # compute advantages, executed on the driver process
+
+                        norm_adv_by_std_in_grpo = self.config.algorithm.get("norm_adv_by_std_in_grpo", True)  # GRPO adv normalization factor
+
+                        batch = compute_advantage(
+                            batch,
+                            adv_estimator=self.config.algorithm.adv_estimator,
+                            gamma=self.config.algorithm.gamma,
+                            lam=self.config.algorithm.lam,
+                            num_repeat=self.config.actor_rollout_ref.rollout.n,
+                            norm_adv_by_std_in_grpo=norm_adv_by_std_in_grpo,
+                            multi_turn=self.config.actor_rollout_ref.rollout.multi_turn.enable,
+                            use_pf_ppo=self.config.algorithm.use_pf_ppo,
+                            pf_ppo_reweight_method=self.config.algorithm.pf_ppo.reweight_method,
+                            pf_ppo_weight_pow=self.config.algorithm.pf_ppo.weight_pow,
+                        )
+
+                    # update critic
+                    if self.use_critic:
+                        with _timer("update_critic", timing_raw):
+                            critic_output = self.critic_wg.update_critic(batch)
+                        critic_output_metrics = reduce_metrics(critic_output.meta_info["metrics"])
+                        metrics.update(critic_output_metrics)
+
+                    # implement critic warmup
+                    if self.config.trainer.critic_warmup <= self.global_steps:
+                        # update actor
+                        with _timer("update_actor", timing_raw):
+                            batch.meta_info["multi_turn"] = self.config.actor_rollout_ref.rollout.multi_turn.enable
+                            actor_output = self.actor_rollout_wg.update_actor(batch)
+                        actor_output_metrics = reduce_metrics(actor_output.meta_info["metrics"])
+                        metrics.update(actor_output_metrics)
+
+                    # Log rollout generations if enabled
+                    rollout_data_dir = self.config.trainer.get("rollout_data_dir", None)
+                    if rollout_data_dir:
+                        with _timer("dump_rollout_generations", timing_raw):
+                            print(batch.batch.keys())
+                            inputs = self.tokenizer.batch_decode(batch.batch["prompts"], skip_special_tokens=True)
+                            outputs = self.tokenizer.batch_decode(batch.batch["responses"], skip_special_tokens=True)
+                            scores = batch.batch["token_level_scores"].sum(-1).cpu().tolist()
+                            self._dump_generations(
+                                inputs=inputs,
+                                outputs=outputs,
+                                scores=scores,
+                                reward_extra_infos_dict=reward_extra_infos_dict,
+                                dump_path=rollout_data_dir,
+                            )
+
+                    # validate
+                    if self.val_reward_fn is not None and self.config.trainer.test_freq > 0 and (is_last_step or self.global_steps % self.config.trainer.test_freq == 0):
+                        with _timer("testing", timing_raw):
+                            val_metrics: dict = self._validate()
+                            if is_last_step:
+                                last_val_metrics = val_metrics
+                        metrics.update(val_metrics)
+
+                    if self.config.trainer.save_freq > 0 and (is_last_step or self.global_steps % self.config.trainer.save_freq == 0):
+                        with _timer("save_checkpoint", timing_raw):
+                            self._save_checkpoint()
+
+                # training metrics
+                metrics.update(
+                    {
+                        "training/global_step": self.global_steps,
+                        "training/epoch": epoch,
+                    }
+                )
+                # collect metrics
+                metrics.update(compute_data_metrics(batch=batch, use_critic=self.use_critic))
+                metrics.update(compute_timing_metrics(batch=batch, timing_raw=timing_raw))
+                # TODO: implement actual tflpo and theoretical tflpo
+                n_gpus = self.resource_pool_manager.get_n_gpus()
+                metrics.update(compute_throughout_metrics(batch=batch, timing_raw=timing_raw, n_gpus=n_gpus))
+
+                # TODO: make a canonical logger that supports various backend
+                logger.log(data=metrics, step=self.global_steps)
+
+                progress_bar.update(1)
+                self.global_steps += 1
+                if is_last_step:
+                    pprint(f"Final validation metrics: {last_val_metrics}")
+                    progress_bar.close()
+                    return
\ No newline at end of file
diff --git a/recipe/grpo/main_grpo.py b/recipe/grpo/main_grpo.py
new file mode 100644
index 00000000000..97d8eae0745
--- /dev/null
+++ b/recipe/grpo/main_grpo.py
@@ -0,0 +1,265 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Note that we don't combine the main with ray_trainer as ray_trainer is used by other main.
+"""
+
+import hydra
+import ray
+
+import pandas as pd
+from .grpo_ray_trainer import RayGRPOTrainer
+from verl.trainer.ppo.reward import load_reward_manager
+from torch.utils.data import Dataset
+from verl.utils.dataset.rl_dataset import RLHFDataset as OriginalRLHFDataset
+
+
+class RLHFDataset(OriginalRLHFDataset):
+    def _read_files_and_tokenize(self):
+        dataframes = []
+        for parquet_file in self.data_files:
+            # read parquet files and cache
+            if parquet_file.endswith('parquet'):
+                dataframe = pd.read_parquet(parquet_file)
+            elif parquet_file.endswith('json'):
+                dataframe = pd.read_json(parquet_file)
+            elif parquet_file.endswith('jsonl'):
+                chunks = []
+                for chunk in pd.read_json(
+                    parquet_file,
+                    lines=True,
+                    chunksize=10000,
+                ):
+                    chunks.append(chunk)
+
+                dataframe = pd.concat(chunks, ignore_index=True)
+            else:
+                raise
+            dataframes.append(dataframe)
+        self.dataframe = pd.concat(dataframes)
+
+        print(f"dataset len: {len(self.dataframe)}")
+
+        if self.config.data.get('system_prompt', None) is not None:
+            system_prompt = self.config.data.system_prompt
+            self.dataframe[self.prompt_key] = self.dataframe[self.prompt_key].apply(
+                lambda x: [{'role': 'system', 'content': system_prompt}]+x
+            )
+        # filter out too long prompts
+        if self.filter_overlong_prompts:
+            tokenizer = self.tokenizer
+            prompt_key = self.prompt_key
+            self.dataframe = self.dataframe.filter(
+                lambda doc: len(tokenizer.apply_chat_template(doc[prompt_key], add_generation_prompt=True)) <= self.max_prompt_length,
+                num_proc=self.num_workers,
+                desc=f"Filtering prompts longer than {self.max_prompt_length} tokens",
+            )
+
+            print(f"filter dataset len: {len(self.dataframe)}")
+
+@hydra.main(config_path="config", config_name="ppo_trainer", version_base=None)
+def main(config):
+    run_grpo(config)
+
+
+def run_grpo(config) -> None:
+    if not ray.is_initialized():
+        # this is for local ray cluster
+        ray.init(
+            runtime_env={"env_vars": {"TOKENIZERS_PARALLELISM": "true", "NCCL_DEBUG": "WARN", "VLLM_LOGGING_LEVEL": "WARN", "VLLM_ALLOW_RUNTIME_LORA_UPDATING": "true"}},
+            num_cpus=config.ray_init.num_cpus,
+        )
+
+    runner = TaskRunner.remote()
+    ray.get(runner.run.remote(config))
+    # create a timeline trace file to analyze the performance
+    timeline_json_file = config.ray_init.get("timeline_json_file", None)
+    if timeline_json_file:
+        ray.timeline(filename=timeline_json_file)
+
+
+@ray.remote(num_cpus=1)  # please make sure main_task is not scheduled on head
+class TaskRunner:
+    def run(self, config):
+        # print initial config
+        from pprint import pprint
+
+        from omegaconf import OmegaConf
+
+        from verl.utils.fs import copy_to_local
+
+        pprint(OmegaConf.to_container(config, resolve=True))  # resolve=True will eval symbol values
+        OmegaConf.resolve(config)
+
+        # download the checkpoint from hdfs
+        local_path = copy_to_local(config.actor_rollout_ref.model.path, use_shm=config.actor_rollout_ref.model.get("use_shm", False))
+
+        # instantiate tokenizer
+        from verl.utils import hf_processor, hf_tokenizer
+
+        trust_remote_code = config.data.get("trust_remote_code", False)
+        tokenizer = hf_tokenizer(local_path, trust_remote_code=trust_remote_code)
+        processor = hf_processor(local_path, trust_remote_code=trust_remote_code, use_fast=True)  # used for multimodal LLM, could be none
+
+        # vllm early verify
+        if config.actor_rollout_ref.rollout.name in ["vllm"]:
+            from verl.utils.vllm_utils import is_version_ge
+
+            if config.actor_rollout_ref.model.get("lora_rank", 0) > 0:
+                if not is_version_ge(pkg="vllm", minver="0.7.3"):
+                    raise NotImplementedError("PPO LoRA is not supported before vllm 0.7.3")
+
+        # define worker classes
+        if config.actor_rollout_ref.actor.strategy in ["fsdp", "fsdp2"]:
+            assert config.critic.strategy in ["fsdp", "fsdp2"]
+            from verl.single_controller.ray import RayWorkerGroup
+            from verl.workers.fsdp_workers import ActorRolloutRefWorker, AsyncActorRolloutRefWorker, CriticWorker
+
+            actor_rollout_cls = AsyncActorRolloutRefWorker if config.actor_rollout_ref.rollout.mode == "async" else ActorRolloutRefWorker
+            ray_worker_group_cls = RayWorkerGroup
+
+        elif config.actor_rollout_ref.actor.strategy == "megatron":
+            assert config.actor_rollout_ref.actor.strategy == config.critic.strategy
+            from verl.single_controller.ray.megatron import NVMegatronRayWorkerGroup
+            from verl.workers.megatron_workers import ActorRolloutRefWorker, AsyncActorRolloutRefWorker, CriticWorker
+
+            actor_rollout_cls = AsyncActorRolloutRefWorker if config.actor_rollout_ref.rollout.mode == "async" else ActorRolloutRefWorker
+            ray_worker_group_cls = NVMegatronRayWorkerGroup
+
+        else:
+            raise NotImplementedError
+
+        from verl.trainer.ppo.ray_trainer import ResourcePoolManager, Role
+
+        role_worker_mapping = {
+            Role.ActorRollout: ray.remote(actor_rollout_cls),
+            Role.Critic: ray.remote(CriticWorker),
+        }
+
+        global_pool_id = "global_pool"
+        resource_pool_spec = {
+            global_pool_id: [config.trainer.n_gpus_per_node] * config.trainer.nnodes,
+        }
+        mapping = {
+            Role.ActorRollout: global_pool_id,
+            Role.Critic: global_pool_id,
+        }
+
+        # we should adopt a multi-source reward function here
+        # - for rule-based rm, we directly call a reward score
+        # - for model-based rm, we call a model
+        # - for code related prompt, we send to a sandbox if there are test cases
+        # - finally, we combine all the rewards together
+        # - The reward type depends on the tag of the data
+        if config.reward_model.enable:
+            if config.reward_model.strategy in ["fsdp", "fsdp2"]:
+                from verl.workers.fsdp_workers import RewardModelWorker
+            elif config.reward_model.strategy == "megatron":
+                from verl.workers.megatron_workers import RewardModelWorker
+            else:
+                raise NotImplementedError
+            role_worker_mapping[Role.RewardModel] = ray.remote(RewardModelWorker)
+            mapping[Role.RewardModel] = global_pool_id
+
+        # use reference model
+        if config.algorithm.use_kl_in_reward or config.actor_rollout_ref.actor.use_kl_loss:
+            role_worker_mapping[Role.RefPolicy] = ray.remote(ActorRolloutRefWorker)
+            mapping[Role.RefPolicy] = global_pool_id
+
+        reward_fn = load_reward_manager(config, tokenizer, num_examine=0, **config.reward_model.get("reward_kwargs", {}))
+        val_reward_fn = load_reward_manager(config, tokenizer, num_examine=1, **config.reward_model.get("reward_kwargs", {}))
+        resource_pool_manager = ResourcePoolManager(resource_pool_spec=resource_pool_spec, mapping=mapping)
+
+        from verl.utils.dataset.rl_dataset import collate_fn
+
+        train_dataset = create_rl_dataset(config.data.train_files, config.data, tokenizer, processor)
+        val_dataset = create_rl_dataset(config.data.val_files, config.data, tokenizer, processor)
+        train_sampler = create_rl_sampler(config.data, train_dataset)
+        trainer = RayPPOTrainer(
+            config=config,
+            tokenizer=tokenizer,
+            processor=processor,
+            role_worker_mapping=role_worker_mapping,
+            resource_pool_manager=resource_pool_manager,
+            ray_worker_group_cls=ray_worker_group_cls,
+            reward_fn=reward_fn,
+            val_reward_fn=val_reward_fn,
+            train_dataset=train_dataset,
+            val_dataset=val_dataset,
+            collate_fn=collate_fn,
+            train_sampler=train_sampler,
+            device_name=config.trainer.device,
+        )
+        trainer.init_workers()
+        trainer.fit()
+
+
+def create_rl_dataset(data_paths, data_config, tokenizer, processor):
+    """Create a dataset.
+
+    Arguments:
+        data_config: The data config.
+        tokenizer (Tokenizer): The tokenizer.
+        processor (Processor): The processor.
+
+    Returns:
+        dataset (Dataset): The dataset.
+    """
+
+    if "custom_cls" in data_config and data_config.custom_cls.get("path", None) is not None:
+        from verl.utils.import_utils import load_extern_type
+
+        dataset_cls = load_extern_type(data_config.custom_cls.path, data_config.custom_cls.name)
+        if not issubclass(dataset_cls, Dataset):
+            raise TypeError(f"The custom dataset class '{data_config.custom_cls.name}' from '{data_config.custom_cls.path}' must inherit from torch.utils.data.Dataset")
+    else:
+        dataset_cls = RLHFDataset
+    print(f"Using dataset class: {dataset_cls.__name__}")
+
+    dataset = dataset_cls(
+        data_files=data_paths,
+        tokenizer=tokenizer,
+        processor=processor,
+        config=data_config,
+    )
+
+    return dataset
+
+
+def create_rl_sampler(data_config, dataset):
+    """Create a sampler for the dataset.
+
+    Arguments:
+        data_config: The data config.
+        dataset (Dataset): The dataset.
+
+    Returns:
+        sampler (Sampler): The sampler.
+    """
+    import torch
+    from torch.utils.data import RandomSampler, SequentialSampler
+
+    # use sampler for better ckpt resume
+    if data_config.shuffle:
+        train_dataloader_generator = torch.Generator()
+        train_dataloader_generator.manual_seed(data_config.get("seed", 1))
+        sampler = RandomSampler(data_source=dataset, generator=train_dataloader_generator)
+    else:
+        sampler = SequentialSampler(data_source=dataset)
+
+    return sampler
+
+
+if __name__ == "__main__":
+    main()
diff --git a/recipe/grpo/scripts/run_grpo_debug.sh b/recipe/grpo/scripts/run_grpo_debug.sh
new file mode 100644
index 00000000000..54ad2674ffc
--- /dev/null
+++ b/recipe/grpo/scripts/run_grpo_debug.sh
@@ -0,0 +1,47 @@
+set -x
+
+# If you are using vllm<=0.6.3, you might need to set the following environment variable to avoid bugs:
+# export VLLM_ATTENTION_BACKEND=XFORMERS
+
+python3 -m verl.trainer.main_ppo \
+    ++hydra.run.dir=outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}-${env:RANK,0} \
+    algorithm.adv_estimator=grpo \
+    data.train_files=/cpfs/user/liuyanjiang/hf_datasets/DeepScaleR-Preview-Dataset/deepscaler.parquet \
+    data.val_files=/cpfs/user/liuyanjiang/hf_datasets/DeepScaleR-Preview-Dataset/deepscaler.parquet \
+    data.train_batch_size=512 \
+    data.max_prompt_length=512 \
+    data.max_response_length=16384 \
+    data.filter_overlong_prompts=True \
+    data.truncation='error' \
+    data.system_prompt='A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think> <answer> answer here </answer>' \
+    actor_rollout_ref.model.path=/cpfs/user/liuyanjiang/hf_models/Qwen2.5-1.5B-Instruct \
+    actor_rollout_ref.actor.optim.lr=1e-6 \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.actor.ppo_mini_batch_size=512 \
+    actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=40 \
+    actor_rollout_ref.actor.use_kl_loss=True \
+    actor_rollout_ref.actor.kl_loss_coef=0.001 \
+    actor_rollout_ref.actor.kl_loss_type=low_var_kl \
+    actor_rollout_ref.actor.entropy_coeff=0 \
+    actor_rollout_ref.model.enable_gradient_checkpointing=True \
+    actor_rollout_ref.actor.fsdp_config.param_offload=False \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=40 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
+    actor_rollout_ref.rollout.name=vllm \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
+    actor_rollout_ref.rollout.n=8 \
+    actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=40 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=True \
+    custom_reward_function.path=verl.custom_reward_functions.grpo_custom_reward_function \
+    algorithm.use_kl_in_reward=False \
+    trainer.critic_warmup=0 \
+    trainer.logger=['console','tensorboard'] \
+    trainer.project_name='grpo' \
+    trainer.experiment_name='debug' \
+    trainer.n_gpus_per_node=8 \
+    trainer.nnodes=1 \
+    trainer.default_local_dir=/newcpfs/user/liuyanjiang/ckpts \
+    trainer.save_freq=20 \
+    trainer.test_freq=5 \
+    trainer.total_epochs=15 $@
\ No newline at end of file
diff --git a/recipe/moe/moe_trainer/__init__.py b/recipe/moe/moe_trainer/__init__.py
new file mode 100644
index 00000000000..1ce90c5eb35
--- /dev/null
+++ b/recipe/moe/moe_trainer/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/recipe/moe/moe_trainer/bitdump.py b/recipe/moe/moe_trainer/bitdump.py
new file mode 100644
index 00000000000..96116c1266c
--- /dev/null
+++ b/recipe/moe/moe_trainer/bitdump.py
@@ -0,0 +1,132 @@
+# from dill import pickle as dill_pickle
+import copyreg
+import logging
+import os
+from collections import OrderedDict
+from contextlib import contextmanager
+from copy import copy, deepcopy
+
+import dill
+import dill.settings
+import torch
+import torch.distributed
+
+
+
+class NoneReducer:
+    @staticmethod
+    def reduce(pg):
+        return (NoneReducer.rebuild, (None,))
+
+    @staticmethod
+    def rebuild(state):
+        return None
+
+
+copyreg.pickle(torch._C._distributed_c10d.ProcessGroup, NoneReducer.reduce)
+copyreg.pickle(torch.cuda.Stream, NoneReducer.reduce)
+copyreg.pickle(torch._C._functions.AccumulateGrad, NoneReducer.reduce)
+copyreg.pickle(torch.cuda.Event, NoneReducer.reduce)
+copyreg.pickle(torch._C.DispatchKeySet, NoneReducer.reduce)
+
+
+dill.settings["byref"] = True
+dill.settings["recurse"] = True
+dill.settings["ignore"] = True
+
+
+def get_children_layers(model: torch.nn.Module, name=""):
+    named_children = dict(model.named_children())
+    children = named_children.values()
+    names = named_children.keys()
+    if len(children) == 0:
+        output_names, output_children = [name], [model]
+    else:
+        output_names, output_children = [name], [model]
+        for n, c in zip(names, children):
+            res_n, res_c = get_children_layers(c, n)
+            for ni, ci in zip(res_n, res_c):
+                full_name = f"{name}.{ni}" if name != "" else ni
+                output_names.append(full_name)
+                output_children.append(ci)
+
+    return output_names, output_children
+
+
+def remove_attrs(m, names):
+    for n in names:
+        if hasattr(m, n):
+            delattr(m, n)
+
+
+def remove_hook_from_module(module: torch.nn.Module, recurse=False):
+    if hasattr(module, "_old_forward"):
+        module.forward = module._old_forward
+        module.forward.__self__.forward = module._old_forward
+        delattr(module, "_old_forward")
+
+    if recurse:
+        for child in module.children():
+            remove_hook_from_module(child, recurse)
+
+
+mbs_ids = {}
+
+
+def hook_fwd_bwd_to_module(model: torch.nn.Module, names=None, prefix="", is_hf=False):
+    def name_fn(name, direction="forward", is_hf=False):
+        def fn(module, input_features, output_features):
+
+            flag = True
+            node = torch._C._current_autograd_node()
+            if flag and name is not None and name != "" and name != " ":
+                print(f"===== dump {name} datas {node=}")
+                if prefix and not os.path.exists(prefix):
+                    os.makedirs(prefix, exist_ok=True)
+
+                key = (name, direction)
+                mbs_ids.setdefault(key, 0)
+                print(
+                    f"{prefix}{name}-iter-mbs{mbs_ids[key]}-{direction}-input.pt"
+                )
+                torch.save(
+                    input_features,
+                    f"{prefix}{name}-iter-mbs{mbs_ids[key]}-{direction}-input.pt",
+                    pickle_module=dill,
+                )
+                torch.save(
+                    output_features,
+                    f"{prefix}{name}-iter-mbs{mbs_ids[key]}-{direction}-output.pt",
+                    pickle_module=dill,
+                )
+                mbs_ids[key] += 1
+
+        return fn
+
+    if isinstance(names, str):
+        names = [names]
+
+    all_names, _ = get_children_layers(model)
+
+    new_names = []
+    if names is None:
+        new_names = all_names
+    else:
+        for n in all_names:
+            for t in names:
+                if t.endswith("*"):
+                    if n.startswith(t[:-1]):
+                        new_names.append(n)
+                    if n == t[:-2]:
+                        new_names.append(n)
+                else:
+                    if n == t:
+                        new_names.append(n)
+
+    modules = dict(model.named_modules())
+    for name in new_names:
+        if name in modules.keys():
+            modules[name].register_forward_hook(name_fn(name, is_hf=is_hf))
+            modules[name].register_full_backward_hook(
+                name_fn(name, "backward", is_hf=is_hf), prepend=True
+            )
diff --git a/recipe/moe/moe_trainer/config/ppo_megatron_trainer.yaml b/recipe/moe/moe_trainer/config/ppo_megatron_trainer.yaml
new file mode 100644
index 00000000000..ed51777ea7d
--- /dev/null
+++ b/recipe/moe/moe_trainer/config/ppo_megatron_trainer.yaml
@@ -0,0 +1,318 @@
+data:
+  tokenizer: null
+  train_files: ~/data/rlhf/gsm8k/train.parquet
+  val_files: ~/data/rlhf/gsm8k/test.parquet
+  prompt_key: prompt
+  reward_fn_key: data_source
+  max_prompt_length: 512
+  max_response_length: 512
+  train_batch_size: 1024
+  val_batch_size: null # DEPRECATED: Validation datasets are sent to inference engines as a whole batch, which will schedule the memory themselves
+  return_raw_input_ids: False  # This should be set to true when the tokenizer between policy and rm differs
+  return_raw_chat: False
+  return_full_prompt: False
+  shuffle: True
+  filter_overlong_prompts: False # for large-scale dataset, filtering overlong prompts could be timeconsuming. You cat set the filter_overlong_prompts_workers to use multiprocessing to speed up.
+  filter_overlong_prompts_workers: 1
+  truncation: error
+  trust_remote_code: False  # main_ppo will check this config to determine whether to use remote code for tokenizer
+  custom_cls:
+      path: null
+      name: null
+
+actor_rollout_ref:
+  hybrid_engine: True
+  model:
+    path: ~/models/deepseek-llm-7b-chat
+    external_lib: null
+    override_config:
+      model_config: {}
+      moe_config:
+        freeze_moe_router: False
+    enable_gradient_checkpointing: False
+    gradient_checkpointing_kwargs:
+      ## Activation Checkpointing
+      activations_checkpoint_method: null # 'uniform', 'block'; not used with 'selective'
+      # 'uniform' divides the total number of transformer layers and checkpoints the input activation of each chunk
+      # 'block' checkpoints the specified number of layers per pipeline stage at the specified granularity
+      activations_checkpoint_granularity: null # 'selective' or 'full'
+      # 'full' will checkpoint the entire transformer layer and 'selective' only checkpoints memory intensive part of attention
+      activations_checkpoint_num_layers: null # not used with 'selective'
+    trust_remote_code: False
+  actor:
+    strategy: megatron  # This is for backward-compatibility
+    ppo_mini_batch_size: 256
+    ppo_micro_batch_size: null # will be deprecated, use ppo_micro_batch_size_per_gpu
+    ppo_micro_batch_size_per_gpu: null
+    use_dynamic_bsz: False
+    ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
+    use_torch_compile: True # False to disable torch compile
+    # pg_losses2 = -advantages * torch.clamp(ratio, 1 - cliprange_low, 1 + cliprange_high)
+    clip_ratio: 0.2 # default value if clip_ratio_low and clip_ratio_high are not specified
+    clip_ratio_low: 0.2
+    clip_ratio_high: 0.2
+    clip_ratio_c: 3.0 # lower bound of the value for Dual-clip PPO from https://arxiv.org/pdf/1912.09729
+    loss_agg_mode: "token-mean" # / "seq-mean-token-sum" / "seq-mean-token-mean"
+    # NOTE: "token-mean" is the default behavior
+    entropy_coeff: 0
+    use_kl_loss: False # True for GRPO
+    kl_loss_coef: 0.001 # for grpo
+    kl_loss_type: low_var_kl # for grpo
+    ppo_epochs: 1
+    data_loader_seed: null
+    shuffle: False
+    optim:
+      lr: 1e-6
+      clip_grad: 1.0
+      lr_warmup_steps: -1 # Prioritized. Negative values mean delegating to lr_warmup_steps_ratio.
+      lr_warmup_steps_ratio: 0.  # the total steps will be injected during runtime
+      min_lr_ratio: null   # only useful for warmup with cosine
+      warmup_style: constant  # select from constant/cosine
+      total_training_steps: -1  # must be override by program
+      weight_decay: 0.01
+    megatron:
+      param_offload: False
+      grad_offload: False
+      optimizer_offload: False
+      tensor_model_parallel_size: 1
+      expert_model_parallel_size: 1
+      expert_tensor_parallel_size: null
+      pipeline_model_parallel_size: 1
+      virtual_pipeline_model_parallel_size: null # change VPP interface for parallelism tests
+      context_parallel_size: 1
+      sequence_parallel: True
+      use_distributed_optimizer: True
+      use_dist_checkpointing: False
+      dist_checkpointing_path: null
+      seed: 42
+      override_transformer_config: {} # additional transformer config like: num_layers_in_first(/last)_pipeline_stage
+    profile: # profile the actor model in `update_policy` 
+      use_profile: False # open it when you want to profile the actor model
+      profile_ranks: null # list, you can specify the ranks to profile
+      step_start: -1 # start step in update_policy 
+      step_end: -1 # end step 
+      save_path: null # the path to save the profile result
+    load_weight: True
+    checkpoint:
+      contents: ['model', 'optimizer', 'extra']  # with 'hf_model' you can save whole model as hf format, now only use sharded model checkpoint to save space
+  ref:
+    strategy: megatron
+    use_torch_compile: ${actor_rollout_ref.actor.use_torch_compile}
+    megatron:
+      param_offload: False
+      tensor_model_parallel_size: 1
+      expert_model_parallel_size: 1
+      expert_tensor_parallel_size: None
+      pipeline_model_parallel_size: 1
+      virtual_pipeline_model_parallel_size: null # change VPP interface for parallelism tests
+      context_parallel_size: 1
+      sequence_parallel: True
+      use_distributed_optimizer: False
+      use_dist_checkpointing: False
+      dist_checkpointing_path: null
+      seed: ${actor_rollout_ref.actor.megatron.seed}
+      override_transformer_config: ${actor_rollout_ref.actor.megatron.override_transformer_config}
+    profile:
+      use_profile: False
+      profile_ranks: null
+      step_start: -1
+      step_end: -1
+      save_path: null
+    load_weight: True
+    log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
+    log_prob_micro_batch_size_per_gpu: null
+    log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
+    log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
+  rollout:
+    name: vllm
+    mode: sync # sync: LLM, async: AsyncLLM
+    temperature: 1.0
+    top_k: -1 # 0 for hf rollout, -1 for vllm rollout
+    top_p: 1
+    prompt_length: ${data.max_prompt_length}  # for xperf_gpt
+    response_length: ${data.max_response_length}
+    # for vllm rollout
+    dtype: bfloat16 # should align with FSDP
+    gpu_memory_utilization: 0.5
+    ignore_eos: False
+    enforce_eager: True
+    free_cache_engine: True
+    load_format: dummy_megatron
+    tensor_model_parallel_size: 1
+    max_num_batched_tokens: 8192
+    max_model_len: null
+    max_num_seqs: 1024
+    log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
+    log_prob_micro_batch_size_per_gpu: null
+    log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
+    log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
+    disable_log_stats: True
+    enable_chunked_prefill: False # could get higher throughput
+    # for hf rollout
+    do_sample: True
+    layer_name_map:
+      qkv_layer_name: qkv
+      gate_proj_layer_name: gate_up
+    # number of responses (i.e. num sample times)
+    n: 1
+    engine_kwargs: # inference engine parameters
+      vllm:
+        swap_space: null # null means "use the engine default value" (usually 4 GB), setting it to, e.g., 32 means 32 GB
+      sglang:
+        attention_backend: null # null means use the engine default value, available options: flashinfer, triton, flashmla
+    val_kwargs:
+      # sampling parameters for validation
+      top_k: -1 # 0 for hf rollout, -1 for vllm rollout
+      top_p: 1.0
+      temperature: 0
+      n: 1
+      do_sample: False # default eager for validation
+    multi_turn: 
+      enable: False  # set to True for multi-turn tool interaction tasks; should set rollout.name to sglang as well
+      max_turns: null  # null for no limit (default max_length // 3)
+      tool_config_path: null  # null for no tool
+      format: chatml  # chatml, more formats will be supported in the future
+
+critic:
+  rollout_n: ${actor_rollout_ref.rollout.n}
+  strategy: megatron
+  optim:
+    lr: 1e-5
+    clip_grad: 1.0
+    lr_warmup_steps_ratio: 0.  # the total steps will be injected during runtime
+    min_lr_ratio: null   # only useful for warmup with cosine
+    warmup_style: constant  # select from constant/cosine
+    total_training_steps: -1  # must be override by program
+    weight_decay: 0.01
+  model:
+    path: ~/models/deepseek-llm-7b-chat
+    tokenizer_path: ${actor_rollout_ref.model.path}
+    override_config:
+      model_config: {}
+      moe_config:
+        freeze_moe_router: False
+    external_lib: ${actor_rollout_ref.model.external_lib}
+    trust_remote_code: False
+    enable_gradient_checkpointing: False
+    gradient_checkpointing_kwargs:
+      ## Activation Checkpointing
+      activations_checkpoint_method: null
+      activations_checkpoint_granularity: null
+      activations_checkpoint_num_layers: null
+  megatron:
+    param_offload: False
+    grad_offload: False
+    optimizer_offload: False
+    tensor_model_parallel_size: 1
+    expert_model_parallel_size: 1
+    expert_tensor_parallel_size: null
+    pipeline_model_parallel_size: 1
+    virtual_pipeline_model_parallel_size: null # change VPP interface for parallelism tests
+    context_parallel_size: 1
+    sequence_parallel: True
+    use_distributed_optimizer: True
+    use_dist_checkpointing: False
+    dist_checkpointing_path: null
+    seed: ${actor_rollout_ref.actor.megatron.seed}
+    override_transformer_config: ${actor_rollout_ref.actor.megatron.override_transformer_config}
+  load_weight: True
+  ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
+  ppo_micro_batch_size: null # will be deprecated, use ppo_micro_batch_size_per_gpu
+  ppo_micro_batch_size_per_gpu: null
+  use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
+  ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
+  forward_max_token_len_per_gpu: ${critic.ppo_max_token_len_per_gpu}
+  ppo_epochs: ${actor_rollout_ref.actor.ppo_epochs}
+  data_loader_seed: ${actor_rollout_ref.actor.data_loader_seed}
+  shuffle: ${actor_rollout_ref.actor.shuffle}
+  cliprange_value: 0.5
+  kl_ctrl:
+    type: fixed
+    kl_coef: 0.001
+  loss_agg_mode: ${actor_rollout_ref.actor.loss_agg_mode}
+  checkpoint:
+    contents: ['model', 'optimizer', 'extra']  # with 'hf_model' you can save whole model as hf format, now only use sharded model checkpoint to save space
+
+reward_model:
+  enable: False
+  strategy: megatron
+  megatron:
+    param_offload: False
+    tensor_model_parallel_size: 1
+    expert_model_parallel_size: 1
+    expert_tensor_parallel_size: null
+    pipeline_model_parallel_size: 1
+    virtual_pipeline_model_parallel_size: null # change VPP interface for parallelism tests
+    context_parallel_size: 1
+    sequence_parallel: True
+    use_distributed_optimizer: False
+    use_dist_checkpointing: False
+    dist_checkpointing_path: null
+    seed: ${actor_rollout_ref.actor.megatron.seed}
+    override_transformer_config: {}
+  model:
+    input_tokenizer: ${actor_rollout_ref.model.path}  # set this to null if the chat template is identical
+    path: ~/models/FsfairX-LLaMA3-RM-v0.1
+    trust_remote_code: False
+    external_lib: ${actor_rollout_ref.model.external_lib}
+  load_weight: True
+  micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu
+  micro_batch_size_per_gpu: null
+  use_dynamic_bsz: ${critic.use_dynamic_bsz}
+  forward_max_token_len_per_gpu: ${critic.forward_max_token_len_per_gpu}
+  max_length: null
+  reward_manager: naive
+  launch_reward_fn_async: False # custom reward function executed async on CPU, during log_prob
+  sandbox_fusion:
+    url: null # faas url to run code in cloud sandbox
+    max_concurrent: 64 # max concurrent requests to sandbox
+custom_reward_function:
+  path: null
+  name: compute_score
+
+algorithm:
+  gamma: 1.0
+  lam: 1.0
+  adv_estimator: gae
+  norm_adv_by_std_in_grpo: True
+  use_kl_in_reward: False
+  kl_penalty: kl  # how to estimate kl divergence
+  kl_ctrl:
+    type: fixed
+    kl_coef: 0.001
+    horizon: 10000
+    target_kl: 0.1
+  use_pf_ppo: False
+  pf_ppo:
+    reweight_method: pow  # ["pow", "max_min", "max_random"]
+    weight_pow: 2.0
+
+trainer:
+  balance_batch: True
+  total_epochs: 30
+  total_training_steps: null
+  project_name: verl_examples
+  experiment_name: gsm8k
+  logger: ['console', 'wandb']
+  log_val_generations: 0
+  nnodes: 1
+  n_gpus_per_node: 8
+  save_freq: -1
+  # auto: find the last ckpt to resume. If can't find, start from scratch
+  resume_mode: auto # or disable or resume_path if resume_from_path is set
+  resume_from_path: null
+  del_local_ckpt_after_load: False
+  val_before_train: True
+  test_freq: -1
+  critic_warmup: 0
+  default_hdfs_dir: null
+  default_local_dir: checkpoints/${trainer.project_name}/${trainer.experiment_name}
+  max_actor_ckpt_to_keep: null
+  max_critic_ckpt_to_keep: null
+  # The timeout for ray worker group to wait for the register center to be ready
+  ray_wait_register_center_timeout: 300
+  device: cuda
+
+ray_init:
+  num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then.
+  timeline_json_file: null
diff --git a/recipe/moe/moe_trainer/config/ppo_trainer.yaml b/recipe/moe/moe_trainer/config/ppo_trainer.yaml
new file mode 100644
index 00000000000..929ffd43e67
--- /dev/null
+++ b/recipe/moe/moe_trainer/config/ppo_trainer.yaml
@@ -0,0 +1,276 @@
+data:
+  tokenizer: null
+  use_shm: False
+  train_files: ~/data/rlhf/gsm8k/train.parquet
+  val_files: ~/data/rlhf/gsm8k/test.parquet
+  prompt_key: prompt
+  reward_fn_key: data_source
+  max_prompt_length: 512
+  max_response_length: 512
+  train_batch_size: 1024
+  val_batch_size: null
+  return_raw_input_ids: False  # This should be set to true when the tokenizer between policy and rm differs
+  return_raw_chat: False
+  return_full_prompt: False
+  shuffle: True
+  filter_overlong_prompts: False # for large-scale dataset, filtering overlong prompts could be timeconsuming. You cat set the filter_overlong_prompts_workers to use multiprocessing to speed up.
+  filter_overlong_prompts_workers: 1
+  truncation: error
+  image_key: images
+  video_key: videos
+  trust_remote_code: False  # main_ppo will check this config to determine whether to use remote code for tokenizer
+  custom_cls:
+      path: null
+      name: null
+
+actor_rollout_ref:
+  hybrid_engine: True
+  model:
+    path: ~/models/deepseek-llm-7b-chat
+    use_shm: False
+    external_lib: null
+    override_config: { }
+    enable_gradient_checkpointing: True
+    enable_activation_offload: False
+    use_remove_padding: False
+    lora_rank: 0  # Set to positive value to enable LoRA (e.g., 32)
+    lora_alpha: 16  # LoRA scaling factor
+    target_modules: all-linear  # all-linear or [q_proj,k_proj,v_proj,o_proj,gate_proj,up_proj,down_proj]
+    use_liger: False
+    use_fused_kernels: False
+    trust_remote_code: False
+  actor:
+    strategy: fsdp  # [fsdp, fsdp2], This is for backward-compatibility
+    ppo_mini_batch_size: 256
+    ppo_micro_batch_size: null # will be deprecated, use ppo_micro_batch_size_per_gpu
+    ppo_micro_batch_size_per_gpu: null
+    use_dynamic_bsz: False
+    ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
+    grad_clip: 1.0
+    # pg_losses2 = -advantages * torch.clamp(ratio, 1 - cliprange_low, 1 + cliprange_high)
+    clip_ratio: 0.2 # default value if clip_ratio_low and clip_ratio_high are not specified
+    clip_ratio_low: 0.2
+    clip_ratio_high: 0.2
+    clip_ratio_c: 3.0 # lower bound of the value for Dual-clip PPO from https://arxiv.org/pdf/1912.09729
+    loss_agg_mode: "token-mean" # / "seq-mean-token-sum" / "seq-mean-token-mean"
+    entropy_coeff: 0
+    use_kl_loss: False # True for GRPO
+    use_torch_compile: True # False to disable torch compile
+    kl_loss_coef: 0.001 # for grpo
+    kl_loss_type: low_var_kl # for grpo
+    ppo_epochs: 1
+    shuffle: False
+    ulysses_sequence_parallel_size: 1 # sp size
+    checkpoint:
+      contents: ['model', 'optimizer', 'extra']  # with 'hf_model' you can save whole model as hf format, now only use sharded model checkpoint to save space
+    optim:
+      lr: 1e-6
+      lr_warmup_steps: -1 # Prioritized. Negative values mean delegating to lr_warmup_steps_ratio.
+      lr_warmup_steps_ratio: 0.  # the total steps will be injected during runtime
+      min_lr_ratio: 0.0   # only used with cosine lr scheduler, default to 0.0
+      num_cycles: 0.5     # only used with cosine lr scheduler, default to 0.5
+      warmup_style: constant  # select from constant/cosine
+      total_training_steps: -1  # must be override by program
+      weight_decay: 0.01
+    fsdp_config:
+      wrap_policy:
+        # transformer_layer_cls_to_wrap: None
+        min_num_params: 0
+      param_offload: False
+      optimizer_offload: False
+      offload_policy: False # only for fsdp2, offload param\grad\optimizer during train
+      reshard_after_forward: True # only for fsdp2, [True, False, int between 1 and fsdp_size]
+      fsdp_size: -1
+  ref:
+    strategy: fsdp
+    fsdp_config:
+      param_offload: False
+      reshard_after_forward: True # only for fsdp2, [True, False, int between 1 and fsdp_size]
+      wrap_policy:
+        # transformer_layer_cls_to_wrap: None
+        min_num_params: 0
+    use_torch_compile: ${actor_rollout_ref.actor.use_torch_compile}
+    log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
+    log_prob_micro_batch_size_per_gpu: null
+    log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
+    log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
+    ulysses_sequence_parallel_size: ${actor_rollout_ref.actor.ulysses_sequence_parallel_size} # sp size
+  rollout:
+    name: vllm
+    mode: sync # sync: LLM, async: AsyncLLM
+    chat_scheduler: null # async chat scheduler, e.g examples.ppo_trainer.naive_chat_scheduler.NaiveChatCompletionScheduler
+    temperature: 1.0
+    top_k: -1 # 0 for hf rollout, -1 for vllm rollout
+    top_p: 1
+    use_fire_sampling: False # https://arxiv.org/abs/2410.21236
+    prompt_length: ${data.max_prompt_length}  # not use for opensource
+    response_length: ${data.max_response_length}
+    # for vllm rollout
+    dtype: bfloat16 # should align with FSDP
+    gpu_memory_utilization: 0.5
+    ignore_eos: False
+    enforce_eager: True
+    free_cache_engine: True
+    load_format: dummy_dtensor  # safetensors (for huge model, and set use_shm=True); dummy_dtensor: randomly init model weight
+    layered_summon: False # for huge model, layered summon can save memory (prevent OOM) but make it slower
+    tensor_model_parallel_size: 2
+    max_num_batched_tokens: 8192
+    max_model_len: null
+    max_num_seqs: 1024
+    log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
+    log_prob_micro_batch_size_per_gpu: null
+    log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
+    log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
+    disable_log_stats: True
+    enable_chunked_prefill: True # may get higher throughput when set to True. When activated, Please increase max_num_batched_tokens or decrease max_model_len.
+    # for hf rollout
+    do_sample: True
+    # number of responses (i.e. num sample times)
+    n: 1 # > 1 for grpo
+    engine_kwargs: # inference engine parameters
+      vllm:
+        swap_space: null # null means "use the engine default value" (usually 4 GB), setting it to, e.g., 32 means 32 GB
+      sglang:
+        attention_backend: null # null means use the engine default value, available options: flashinfer, triton, flashmla
+    val_kwargs:
+      # sampling parameters for validation
+      top_k: -1 # 0 for hf rollout, -1 for vllm rollout
+      top_p: 1.0
+      temperature: 0
+      n: 1
+      do_sample: False # default eager for validation
+    multi_turn:
+      enable: False  # set to True for multi-turn tool interaction tasks; should set rollout.name to sglang as well
+      max_turns: null  # null for no limit (default max_length // 3)
+      tool_config_path: null  # null for no tool
+      format: chatml  # chatml, more formats will be supported in the future
+
+critic:
+  rollout_n: ${actor_rollout_ref.rollout.n}
+  strategy: fsdp # [fsdp, fsdp2]
+  optim:
+    lr: 1e-5
+    lr_warmup_steps_ratio: 0.  # the total steps will be injected during runtime
+    min_lr_ratio: null   # only useful for warmup with cosine
+    warmup_style: constant  # select from constant/cosine
+    total_training_steps: -1  # must be override by program
+    weight_decay: 0.01
+  model:
+    path: ~/models/deepseek-llm-7b-chat
+    use_shm: False
+    tokenizer_path: ${actor_rollout_ref.model.path}
+    override_config: { }
+    external_lib: ${actor_rollout_ref.model.external_lib}
+    enable_gradient_checkpointing: True
+    enable_activation_offload: False
+    use_remove_padding: False
+    trust_remote_code: ${actor_rollout_ref.model.trust_remote_code}
+    fsdp_config:
+      param_offload: False
+      optimizer_offload: False
+      offload_policy: False # only for fsdp2, offload param\grad\optimizer during train
+      reshard_after_forward: True # only for fsdp2, [True, False, int between 1 and fsdp_size]
+      wrap_policy:
+        # transformer_layer_cls_to_wrap: None
+        min_num_params: 0
+      fsdp_size: -1
+    lora_rank: 0  # Set to positive value to enable LoRA (e.g., 32)
+    lora_alpha: 16  # LoRA scaling factor
+    target_modules: all-linear  # all-linear or [q_proj,k_proj,v_proj,o_proj,gate_proj,up_proj,down_proj]
+  ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
+  ppo_micro_batch_size: null # will be deprecated, use ppo_micro_batch_size_per_gpu
+  ppo_micro_batch_size_per_gpu: null
+  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
+  forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
+  use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
+  ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
+  forward_max_token_len_per_gpu: ${critic.ppo_max_token_len_per_gpu}
+  ulysses_sequence_parallel_size: 1 # sp size
+  ppo_epochs: ${actor_rollout_ref.actor.ppo_epochs}
+  shuffle: ${actor_rollout_ref.actor.shuffle}
+  grad_clip: 1.0
+  cliprange_value: 0.5
+  loss_agg_mode: ${actor_rollout_ref.actor.loss_agg_mode}
+  checkpoint:
+    contents: ['model', 'optimizer', 'extra']  # with 'hf_model' you can save whole model as hf format, now only use sharded model checkpoint to save space
+
+reward_model:
+  enable: False
+  strategy: fsdp
+  model:
+    input_tokenizer: ${actor_rollout_ref.model.path}  # set this to null if the chat template is identical
+    path: ~/models/FsfairX-LLaMA3-RM-v0.1
+    use_shm: False
+    external_lib: ${actor_rollout_ref.model.external_lib}
+    use_remove_padding: False
+    use_fused_kernels: ${actor_rollout_ref.model.use_fused_kernels}
+    trust_remote_code: False
+    fsdp_config:
+      wrap_policy:
+        min_num_params: 0
+      param_offload: False
+      reshard_after_forward: True # only for fsdp2, [True, False, int between 1 and fsdp_size]
+      fsdp_size: -1
+  micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu
+  micro_batch_size_per_gpu: null # set a number
+  max_length: null
+  ulysses_sequence_parallel_size: 1 # sp size
+  use_dynamic_bsz: ${critic.use_dynamic_bsz}
+  forward_max_token_len_per_gpu: ${critic.forward_max_token_len_per_gpu}
+  reward_manager: naive
+  launch_reward_fn_async: False # custom reward function executed async on CPU, during log_prob
+  sandbox_fusion:
+    url: null # faas url to run code in cloud sandbox
+    max_concurrent: 64 # max concurrent requests to sandbox
+custom_reward_function:
+  path: null
+  name: compute_score
+
+algorithm:
+  gamma: 1.0
+  lam: 1.0
+  adv_estimator: gae
+  norm_adv_by_std_in_grpo: True
+  use_kl_in_reward: False
+  kl_penalty: kl  # how to estimate kl divergence
+  kl_ctrl:
+    type: fixed
+    kl_coef: 0.001
+    horizon: 10000
+    target_kl: 0.1
+  use_pf_ppo: False
+  pf_ppo:
+    reweight_method: pow  # ["pow", "max_min", "max_random"]
+    weight_pow: 2.0
+
+trainer:
+  balance_batch: True
+  total_epochs: 30
+  total_training_steps: null
+  project_name: verl_examples
+  experiment_name: gsm8k
+  logger: [ 'console', 'wandb' ]
+  log_val_generations: 0
+  rollout_data_dir: null # directory for logging the rollout data, no dump if null
+  validation_data_dir: null # directory for logging the validation data, no dump if null
+  nnodes: 1
+  n_gpus_per_node: 8
+  save_freq: -1
+  # auto: find the last ckpt to resume. If can't find, start from scratch
+  resume_mode: auto # or disable or resume_path if resume_from_path is set
+  resume_from_path: null
+  val_before_train: True
+  test_freq: -1
+  critic_warmup: 0
+  default_hdfs_dir: null
+  del_local_ckpt_after_load: False
+  default_local_dir: checkpoints/${trainer.project_name}/${trainer.experiment_name}
+  max_actor_ckpt_to_keep: null
+  max_critic_ckpt_to_keep: null
+  # The timeout for ray worker group to wait for the register center to be ready
+  ray_wait_register_center_timeout: 300
+  device: cuda
+
+ray_init:
+  num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then.
+  timeline_json_file: null
diff --git a/recipe/moe/moe_trainer/config/sft_trainer.yaml b/recipe/moe/moe_trainer/config/sft_trainer.yaml
new file mode 100644
index 00000000000..e6ca86a5ffd
--- /dev/null
+++ b/recipe/moe/moe_trainer/config/sft_trainer.yaml
@@ -0,0 +1,65 @@
+data:
+  train_batch_size: 256
+  micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu
+  micro_batch_size_per_gpu: 4  # this is also val batch size
+  train_files: ~/data/gsm8k/train.parquet
+  val_files: ~/data/gsm8k/test.parquet
+  # Single-turn settings
+  prompt_key: question
+  response_key: answer
+  prompt_dict_keys: ['question']
+  response_dict_keys: ['answer']
+  # Multi-turn settings
+  multiturn:
+    enable: false  # Set to true to use multi-turn dataset
+    messages_key: messages  # Key for messages list in multi-turn mode
+  max_length: 1024
+  truncation: error
+  balance_dp_token: False
+  chat_template: null
+  custom_cls:
+    path: null
+    name: null
+  use_shm: False
+model:
+  partial_pretrain: ~/models/gemma-1.1-7b-it
+  use_shm: False
+  fsdp_config:
+    model_dtype: fp32
+    wrap_policy:
+      min_num_params: 0
+    cpu_offload: False
+    offload_params: False
+  external_lib: null
+  enable_gradient_checkpointing: False
+  trust_remote_code: False
+  lora_rank: 0  # Set to positive value to enable LoRA (e.g., 32)
+  lora_alpha: 16  # LoRA scaling factor
+  target_modules: all-linear  # Target modules for LoRA adaptation
+  use_liger: False
+  strategy: fsdp2
+optim:
+  lr: 1e-5
+  betas: [0.9, 0.95]
+  weight_decay: 0.01
+  warmup_steps_ratio: 0.1
+  clip_grad: 1.0
+  lr_scheduler: cosine
+ulysses_sequence_parallel_size: 1
+use_remove_padding: False
+trainer:
+  default_local_dir: checkpoints/${trainer.project_name}/${trainer.experiment_name}
+  default_hdfs_dir: null
+  resume_path: null
+  project_name: gsm8k-sft
+  experiment_name: test
+  total_epochs: 4
+  total_training_steps: null
+  logger: [ 'console', 'wandb' ]
+  seed: 1
+
+  save_freq: -1
+  test_freq: -1
+  nnodes: 1
+  n_gpus_per_node: 8
+  max_ckpt_to_keep: null # TODO
diff --git a/recipe/moe/moe_trainer/configuration_xdgmoe.py b/recipe/moe/moe_trainer/configuration_xdgmoe.py
new file mode 100644
index 00000000000..2c127297f84
--- /dev/null
+++ b/recipe/moe/moe_trainer/configuration_xdgmoe.py
@@ -0,0 +1,210 @@
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+
+logger = logging.get_logger(__name__)
+
+XdgMoE_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
+class XdgMoEConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`XdgMoEModel`]. It is used to instantiate an XdgMoE
+    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
+    defaults will yield a similar configuration to that of the XdgMoE-7B.
+
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+
+
+    Args:
+        vocab_size (`int`, *optional*, defaults to 102400):
+            Vocabulary size of the Deep model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`XdgMoEModel`]
+        hidden_size (`int`, *optional*, defaults to 4096):
+            Dimension of the hidden representations.
+        intermediate_size (`int`, *optional*, defaults to 11008):
+            Dimension of the MLP representations.
+        moe_intermediate_size (`int`, *optional*, defaults to 1407):
+            Dimension of the MoE representations.
+        num_hidden_layers (`int`, *optional*, defaults to 32):
+            Number of hidden layers in the Transformer decoder.
+        num_attention_heads (`int`, *optional*, defaults to 32):
+            Number of attention heads for each attention layer in the Transformer decoder.
+        n_shared_experts (`int`, *optional*, defaults to None):
+            Number of shared experts, None means dense model.
+        n_routed_experts (`int`, *optional*, defaults to None):
+            Number of routed experts, None means dense model.
+        num_experts_per_tok (`int`, *optional*, defaults to None):
+            Number of selected experts, None means dense model.
+        moe_layer_freq (`int`, *optional*, defaults to 1):
+            The frequency of the MoE layer: one expert layer for every `moe_layer_freq - 1` dense layers.
+        first_k_dense_replace (`int`, *optional*, defaults to 0):
+            Number of dense layers in shallow layers(embed->dense->dense->...->dense->moe->moe...->lm_head).
+                                                            \--k dense layers--/
+        norm_topk_prob (`bool`, *optional*, defaults to False):
+            Whether to normalize the weights of the routed experts.
+        scoring_func (`str`, *optional*, defaults to 'softmax'):
+            Method of computing expert weights.
+        aux_loss_alpha (`float`, *optional*, defaults to 0.001):
+            Auxiliary loss weight coefficient.
+        seq_aux = (`bool`, *optional*, defaults to True):
+            Whether to compute the auxiliary loss for each individual sample.
+        num_key_value_heads (`int`, *optional*):
+            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
+            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
+            `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
+            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
+            by meanpooling all the original heads within that group. For more details checkout [this
+            paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
+            `num_attention_heads`.
+        hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
+            The non-linear activation function (function or string) in the decoder.
+        max_position_embeddings (`int`, *optional*, defaults to 2048):
+            The maximum sequence length that this model might ever be used with.
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        rms_norm_eps (`float`, *optional*, defaults to 1e-06):
+            The epsilon used by the rms normalization layers.
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether or not the model should return the last key/values attentions (not used by all models). Only
+            relevant if `config.is_decoder=True`.
+        pad_token_id (`int`, *optional*):
+            Padding token id.
+        bos_token_id (`int`, *optional*, defaults to 1):
+            Beginning of stream token id.
+        eos_token_id (`int`, *optional*, defaults to 2):
+            End of stream token id.
+        pretraining_tp (`int`, *optional*, defaults to 1):
+            Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this
+            document](https://huggingface.co/docs/transformers/parallelism) to understand more about it. This value is
+            necessary to ensure exact reproducibility of the pretraining results. Please refer to [this
+            issue](https://github.com/pytorch/pytorch/issues/76232).
+        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
+            Whether to tie weight embeddings
+        rope_theta (`float`, *optional*, defaults to 10000.0):
+            The base period of the RoPE embeddings.
+        rope_scaling (`Dict`, *optional*):
+            Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling
+            strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is
+            `{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update
+            `max_position_embeddings` to the expected new maximum.
+        attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`):
+            Whether to use a bias in the query, key, value and output projection layers during self-attention.
+        attention_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for the attention probabilities.
+
+    ```python
+    >>> from transformers import XdgMoEModel, XdgMoEConfig
+
+    >>> # Initializing a XdgMoE XdgMoE-7b style configuration
+    >>> configuration = XdgMoEConfig()
+
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+
+    model_type = "XdgMoE"
+    keys_to_ignore_at_inference = ["past_key_values"]
+
+    def __init__(
+        self,
+        vocab_size=102400,
+        hidden_size=4096,
+        intermediate_size=11008,
+        moe_intermediate_size = 1407,
+        num_hidden_layers=30,
+        num_attention_heads=32,
+        num_key_value_heads=32,
+        n_shared_experts = None,
+        n_routed_experts = None,
+        num_experts_per_tok = None,
+        moe_layer_freq = 1,
+        first_k_dense_replace = 0,
+        norm_topk_prob = False,
+        scoring_func = 'softmax',
+        aux_loss_alpha = 0.001,
+        seq_aux = True,
+        hidden_act="silu",
+        max_position_embeddings=2048,
+        initializer_range=0.02,
+        rms_norm_eps=1e-6,
+        use_cache=True,
+        pad_token_id=None,
+        bos_token_id=151643,
+        eos_token_id=151645,
+        pretraining_tp=1,
+        tie_word_embeddings=False,
+        rope_theta=10000.0,
+        rope_scaling=None,
+        attention_bias=False,
+        attention_dropout=0.0,
+        qk_layernorm=False,
+        moe_gating_fp32=False,
+        routed_scaling_factor=1.0,
+        head_dim=128,
+        router_expert_score_correction_coeff=0,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.moe_intermediate_size = moe_intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.n_shared_experts = n_shared_experts
+        self.n_routed_experts = n_routed_experts
+        self.num_experts_per_tok = num_experts_per_tok
+        self.moe_layer_freq = moe_layer_freq
+        self.first_k_dense_replace = first_k_dense_replace
+        self.norm_topk_prob = norm_topk_prob
+        self.scoring_func = scoring_func
+        self.aux_loss_alpha = aux_loss_alpha
+        self.seq_aux = seq_aux
+        # for backward compatibility
+        if num_key_value_heads is None:
+            num_key_value_heads = num_attention_heads
+
+        self.num_key_value_heads = num_key_value_heads
+        self.hidden_act = hidden_act
+        self.initializer_range = initializer_range
+        self.rms_norm_eps = rms_norm_eps
+        self.pretraining_tp = pretraining_tp
+        self.use_cache = use_cache
+        self.rope_theta = rope_theta
+        self.rope_scaling = rope_scaling
+        self._rope_scaling_validation()
+        self.attention_bias = attention_bias
+        self.attention_dropout = attention_dropout
+        self.qk_layernorm = qk_layernorm
+        self.moe_gating_fp32 = moe_gating_fp32
+        self.routed_scaling_factor = routed_scaling_factor
+        self.head_dim = head_dim
+        self.router_expert_score_correction_coeff = router_expert_score_correction_coeff
+
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )
+
+    def _rope_scaling_validation(self):
+        """
+        Validate the `rope_scaling` configuration.
+        """
+        if self.rope_scaling is None:
+            return
+
+        if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2:
+            raise ValueError(
+                "`rope_scaling` must be a dictionary with with two fields, `type` and `factor`, "
+                f"got {self.rope_scaling}"
+            )
+        rope_scaling_type = self.rope_scaling.get("type", None)
+        rope_scaling_factor = self.rope_scaling.get("factor", None)
+        if rope_scaling_type is None or rope_scaling_type not in ["linear", "dynamic"]:
+            raise ValueError(
+                f"`rope_scaling`'s type field must be one of ['linear', 'dynamic'], got {rope_scaling_type}"
+            )
+        if rope_scaling_factor is None or not isinstance(rope_scaling_factor, float) or rope_scaling_factor <= 1.0:
+            raise ValueError(f"`rope_scaling`'s factor field must be a float > 1, got {rope_scaling_factor}")
diff --git a/recipe/moe/moe_trainer/debug_data/sft.json b/recipe/moe/moe_trainer/debug_data/sft.json
new file mode 100644
index 00000000000..8179d5e915e
--- /dev/null
+++ b/recipe/moe/moe_trainer/debug_data/sft.json
@@ -0,0 +1 @@
+[{"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}, {"answer": "- \\dfrac {1}{7}", "problem": "Given that a hyperbola shares common foci $F_1$ and $F_2$ with the ellipse $\\dfrac {x^{2}}{9}+ \\dfrac {y^{2}}{25}=1$, and their sum of eccentricities is $2 \\dfrac {4}{5}$.  \n$(1)$ Find the standard equation of the hyperbola;  \n$(2)$ Let $P$ be a point of intersection between the hyperbola and the ellipse, calculate $\\cos \\angle F_{1}PF_{2}$."}]
\ No newline at end of file
diff --git a/recipe/moe/moe_trainer/dump.py b/recipe/moe/moe_trainer/dump.py
new file mode 120000
index 00000000000..f0e8d85ffcf
--- /dev/null
+++ b/recipe/moe/moe_trainer/dump.py
@@ -0,0 +1 @@
+/cpfs/user/guangsu/verl-debug/dump.py
\ No newline at end of file
diff --git a/recipe/moe/moe_trainer/fsdp_sft_trainer.py b/recipe/moe/moe_trainer/fsdp_sft_trainer.py
new file mode 100644
index 00000000000..503871fbef1
--- /dev/null
+++ b/recipe/moe/moe_trainer/fsdp_sft_trainer.py
@@ -0,0 +1,771 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+A lightweight one-file FSDP SFT Trainer
+TODO(zhangchi.usc1992)
+- Add calculation of mfu
+- Add validation
+"""
+
+import os
+
+os.environ["NCCL_DEBUG"] = "WARN"
+os.environ["TOKENIZERS_PARALLELISM"] = "true"
+
+import logging
+import re
+from contextlib import nullcontext
+
+import pandas as pd
+import hydra
+import torch
+import torch.distributed
+# from peft import LoraConfig, TaskType, get_peft_model
+from tensordict import TensorDict
+from torch import nn, optim
+from torch.distributed.device_mesh import DeviceMesh, init_device_mesh
+from torch.distributed.fsdp import CPUOffload, MixedPrecision, ShardingStrategy
+from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
+from torch.utils.data import DataLoader, Dataset, DistributedSampler
+from tqdm import tqdm
+from transformers import AutoConfig, AutoModelForCausalLM, PreTrainedModel
+
+import verl.utils.hdfs_io as hdfs_io
+from verl.utils.dataset import SFTDataset as OriginSFTDataset
+from verl.utils.dataset.multiturn_sft_dataset import MultiTurnSFTDataset as OriginMultiTurnSFTDataset
+from verl.utils.debug import log_gpu_memory_usage
+from verl.utils.device import get_device_name, get_torch_device, is_cuda_available, is_npu_available
+from verl.utils.distributed import destroy_global_process_group, initialize_global_process_group
+from verl.utils.fs import copy_to_local
+from verl.utils.fsdp_utils import (
+    CPUOffloadPolicy,
+    MixedPrecisionPolicy,
+    apply_fsdp2,
+    fsdp2_load_full_state_dict,
+    get_fsdp_wrap_policy,
+    get_init_weight_context_manager,
+    init_fn,
+    fsdp2_clip_grad_norm_
+)
+from verl.utils.torch_dtypes import PrecisionType
+from verl.utils.torch_functional import get_cosine_schedule_with_warmup, get_wsd_schedule_with_warmup
+from verl.utils.py_functional import convert_to_regular_types
+from verl.utils.tracking import Tracking
+from verl.utils.ulysses import (
+    gather_outpus_and_unpad,
+    get_ulysses_sequence_parallel_world_size,
+    ulysses_pad_and_slice_inputs,
+)
+from verl.workers.sharding_manager.fsdp_ulysses import FSDPUlyssesShardingManager
+
+if is_cuda_available:
+    from flash_attn.bert_padding import index_first_axis, pad_input, rearrange, unpad_input
+elif is_npu_available:
+    from transformers.integrations.npu_flash_attention import index_first_axis, pad_input, rearrange, unpad_input
+
+logger = logging.getLogger(__file__)
+logger.setLevel(os.getenv("VERL_SFT_LOGGING_LEVEL", "WARN"))
+
+
+def _zero_next_index(self):
+    return [0]
+
+
+# torch.utils.data.dataloader._BaseDataLoaderIter._next_index = _zero_next_index
+
+
+from megatron.core.datasets.blended_megatron_dataset_builder import (
+    BlendedMegatronDatasetBuilder,
+)
+from cybertron.data.datasets.gpt_dataset_ext import GPTDatasetExt
+from cybertron.data.datasets.gpt_dataset_ext import GPTDatasetExtConfig
+
+# from megatron.core.datasets.utils import get_blend_from_list
+
+from cybertron.tokenizer.tokenizer import HFTokenizer
+
+
+def build_dataset(tokenizer_path):
+
+    class HFGPTDataset(GPTDatasetExt):
+
+        def __getitem__(self, idx):
+            item = super().__getitem__(idx)
+            item["input_ids"] = item.pop("tokens")
+            return item
+
+    def build_gptdataset_config(tokenizer):
+        config_args = dict(
+            accurate_attn_mask_with_cp=False,
+            blend=None,
+            blend_per_split=[
+                (
+                    [
+                        "/cpfs/user/shimo/DATA/sft_data_pro/2_multi_session_megatron/qwen2_xdg_8k_sys_v7.2.1_stage1_v2.2_stage2_dev0.7_multi_session_prompt_document"
+                    ],
+                    [1.0],
+                ),
+                (["/cpfs/data/text_data/submit_v0/pile_test"], [1.0]),
+                (["/cpfs/data/text_data/submit_v0/pile_test"], [1.0]),
+            ],
+            create_attention_mask=False,
+            enable_length_upsample=False,
+            ensure_full_document=True,
+            eod_mask_loss=True,
+            length_upsample_data_root="/cpfs/user/guangsu/verl-debug/run_cybertron/save/length_upsample_data",
+            length_upsample_min_bucket_docs=1000,
+            length_upsample_times='{\\"0-32768\\":1,\\"32768-inf\\":5}',
+            mask_loss_id=160000,
+            mmap_bin_files=True,
+            padding_sequence_to_mul=1,
+            path_to_cache="/cpfs/user/guangsu/verl-debug/run_cybertron/save/moe_sft_145b_v7.2.1_stage1_v2.2_stage2_dev0.7_guangsu/data_cache",
+            random_seed=1234,
+            reset_attention_mask=False,
+            reset_position_ids=False,
+            sequence_length=8192,
+            split=None,
+            tokenizer=tokenizer,
+        )
+        return GPTDatasetExtConfig(**config_args)
+
+    tokenizer = HFTokenizer(tokenizer_path)
+    config = build_gptdataset_config(tokenizer)
+    HFGPTDataset.__name__ = "GPTDatasetExt"
+    dataset = BlendedMegatronDatasetBuilder(
+        HFGPTDataset, (8, 8000, 8000), lambda: True, config
+    ).build()[0]
+    return dataset
+
+
+def extract_step(path):
+    match = re.search(r"global_step_(\d+)", path)
+    if match:
+        return int(match.group(1))
+    return None
+
+class MultiTurnSFTDataset(OriginMultiTurnSFTDataset):
+    def _read_files_and_process(self):
+        def series_to_item(ls):
+            import numpy
+            import pandas
+
+            while isinstance(ls, (pandas.core.series.Series, numpy.ndarray)) and len(ls) == 1:
+                ls = ls[0]
+            return ls
+        # if os.getenv("DEBUG", None) != None:
+        #     if torch.distributed.is_initialized():
+        #         if torch.distributed.get_rank() == 0:
+        #             breakpoint()
+        #     else:
+        #         torch.distributed.barrier()
+        dataframes = []
+        for parquet_file in self.parquet_files:
+            if parquet_file.endswith('parquet'):
+                dataframe = pd.read_parquet(parquet_file)
+            elif parquet_file.endswith('json'):
+                dataframe = pd.read_json(parquet_file)
+            else:
+                raise
+            dataframes.append(dataframe)
+        self.dataframe = pd.concat(dataframes)
+        # if os.getenv("DEBUG", None) != None and torch.distributed.is_initialized() and torch.distributed.get_rank() == 0:    
+        #     torch.distributed.barrier()
+        # Extract messages list from dataframe
+        self.messages = self.dataframe[self.messages_key].apply(series_to_item).tolist()
+
+class SFTDataset(OriginSFTDataset):
+    def _read_files_and_tokenize(self):
+        def series_to_item(ls):
+            import numpy
+            import pandas
+
+            while isinstance(ls, (pandas.core.series.Series, numpy.ndarray)) and len(ls) == 1:
+                ls = ls[0]
+            return ls
+
+        dataframes = []
+        for parquet_file in self.parquet_files:
+            # read parquet files and cache
+            if parquet_file.endswith('parquet'):
+                dataframe = pd.read_parquet(parquet_file)
+            elif parquet_file.endswith('json'):
+                dataframe = pd.read_json(parquet_file)
+            else:
+                raise
+            dataframes.append(dataframe)
+        self.dataframe = pd.concat(dataframes)
+        self.prompts = self.dataframe[self.prompt_key[0]]
+        self.prompts = self.prompts.values
+        self.responses = self.dataframe[self.response_key[0]]
+        self.responses = self.responses.values
+
+class FSDPSFTTrainer:
+    def __init__(self, config, device_mesh: DeviceMesh, ulysses_device_mesh: DeviceMesh, tokenizer, train_dataset: Dataset, val_dataset: Dataset):
+        self.config = config
+        self.device_mesh = device_mesh
+        self.ulysses_device_mesh = ulysses_device_mesh
+        self.sharding_manager = FSDPUlyssesShardingManager(self.ulysses_device_mesh)
+        self.tokenizer = tokenizer
+        if self.config.data.chat_template is not None:
+            raise ValueError("Apply Chat template from config is not supported yet.")
+
+        # normalize dp size
+        self._normalize_config_bsz()
+
+        # Set sequence parallel size
+        self.config.ulysses_sequence_parallel_size = getattr(self.config, "ulysses_sequence_parallel_size", 1)
+        self.use_remove_padding = getattr(self.config, "use_remove_padding", False)
+        if self.device_mesh.get_rank() == 0:
+            print(f"Using sequence parallel size: {self.config.ulysses_sequence_parallel_size}")
+            print(f"Using remove padding: {self.use_remove_padding}")
+
+        self._build_dataloader(train_dataset, val_dataset)
+        # build model
+        self._build_model_optimizer()
+
+        # TODO: add checkpoint manager
+        if self.device_mesh.get_rank() == 0:
+            print(self.config)
+        self.device_name = get_device_name()
+        # torch.autograd.set_detect_anomaly(True)
+
+    def _normalize_config_bsz(self):
+        dp_size = self.device_mesh.size(0) if not self.ulysses_device_mesh else self.ulysses_device_mesh.size(0)
+        if self.device_mesh.get_rank() == 0:
+            print(f"Normalize batch size by dp {dp_size}")
+
+        assert self.config.data.train_batch_size % dp_size == 0, f"Global batch size {self.config.data.train_batch_size} is not divisible by dp size {dp_size}"
+
+        self.config.data.train_batch_size //= dp_size
+
+        assert self.config.data.train_batch_size % self.config.data.micro_batch_size_per_gpu == 0
+
+    def _build_dataloader(self, train_dataset, val_dataset):
+        # build dataset
+        config = self.config
+        self.train_dataset, self.val_dataset = train_dataset, val_dataset
+
+        # build dataloader
+        # Use data parallel rank and size instead of global rank and world size
+
+        # If doing SP, we need to use the local rank and size
+        if self.config.ulysses_sequence_parallel_size > 1:
+            rank = self.ulysses_device_mesh.get_local_rank("dp")
+            world_size = self.ulysses_device_mesh.size(0)
+            if self.ulysses_device_mesh.get_rank() == 0:
+                print(f"Using SP rank {rank} and size {world_size} for data distribution")
+                print("Each SP rank gets different data, but the same data WITHIN the same rank")
+        else:
+            rank = self.device_mesh.get_rank()
+            world_size = self.device_mesh.size()
+        if self.device_mesh.get_rank() == 0:
+            print(f"Using FSDP rank {rank} and size {world_size} for data distribution")
+
+        self.train_sampler = DistributedSampler(self.train_dataset, shuffle=True, num_replicas=world_size, rank=rank, drop_last=True)
+        self.train_dataloader = DataLoader(
+            dataset=self.train_dataset,
+            batch_size=config.data.train_batch_size,
+            sampler=self.train_sampler,
+            num_workers=8,
+            pin_memory=True,
+            drop_last=True,
+        )
+
+        self.val_sampler = DistributedSampler(self.val_dataset, shuffle=False, num_replicas=world_size, rank=rank, drop_last=True)
+        self.val_dataloader = DataLoader(
+            dataset=self.val_dataset,
+            batch_size=config.data.micro_batch_size_per_gpu,
+            sampler=self.val_sampler,
+            num_workers=8,
+            pin_memory=True,
+            drop_last=True,
+        )
+
+    def _build_model_optimizer(self):
+        # TODO (zhangchi.usc1992):
+        # 1. support pretrain from random weights
+        # 2. support init directly from sharded weights
+        local_model_path = copy_to_local(src=self.config.model.partial_pretrain, verbose=True)
+
+        if self.config.model.get("external_lib", None) is not None:
+            # This is used to import external_lib into the huggingface systems
+            import importlib
+
+            importlib.import_module(self.config.model.external_lib)
+
+        log_gpu_memory_usage("Before model allocation", logger=logger)
+
+        trust_remote_code = self.config.model.trust_remote_code
+        torch_dtype = self.config.model.fsdp_config.get("model_dtype", "fp32")
+        torch_dtype = PrecisionType.to_dtype(torch_dtype)
+        # load config first
+        config = AutoConfig.from_pretrained(local_model_path, trust_remote_code=trust_remote_code)
+        self.model_config = config
+        if self.config.ulysses_sequence_parallel_size > 1:
+            assert self.use_remove_padding, "Sequence parallel is only supported when remove_padding is enabled"
+
+        # This may be very large
+        init_context = get_init_weight_context_manager(use_meta_tensor=not config.tie_word_embeddings, mesh=self.device_mesh)
+
+        with init_context():
+            self.model: PreTrainedModel = AutoModelForCausalLM.from_pretrained(
+                local_model_path,
+                config=config,
+                torch_dtype=torch_dtype,
+                attn_implementation="flash_attention_2",
+                trust_remote_code=trust_remote_code,
+            )
+
+            if self.use_remove_padding or self.config.ulysses_sequence_parallel_size > 1:
+                from verl.models.transformers.monkey_patch import apply_monkey_patch
+
+                apply_monkey_patch(model=self.model, ulysses_sp_size=self.config.ulysses_sequence_parallel_size)
+
+            # Apply Liger kernel if use_liger is enabled
+            if self.config.model.get("use_liger", False):
+                from liger_kernel.transformers.monkey_patch import _apply_liger_kernel_to_instance
+
+                _apply_liger_kernel_to_instance(model=self.model)
+
+            if self.config.model.get("lora_rank", 0) > 0:
+                self.model.enable_input_require_grads()
+                # Convert config to regular Python types before creating PEFT model
+                lora_config = {
+                    "task_type": TaskType.CAUSAL_LM,
+                    "r": self.config.model.lora_rank,
+                    "lora_alpha": self.config.model.lora_alpha,
+                    "target_modules": convert_to_regular_types(self.config.model.target_modules),
+                    "bias": "none",
+                }
+                self.model = get_peft_model(self.model, LoraConfig(**lora_config))
+
+        if self.config.model.enable_gradient_checkpointing:
+            self.model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={"use_reentrant": False})
+
+        log_gpu_memory_usage("After model allocation", logger=logger)
+
+        mixed_precision = MixedPrecision(
+            param_dtype=torch.bfloat16,
+            reduce_dtype=torch.bfloat16,
+            buffer_dtype=torch.bfloat16,
+        )
+
+        auto_wrap_policy = get_fsdp_wrap_policy(
+            self.model,
+            config=self.config.model.fsdp_config.wrap_policy,
+            is_lora=self.config.model.get("lora_rank", 0) > 0,
+        )
+        if self.device_mesh.get_rank() == 0:
+            print(auto_wrap_policy)
+
+        if not self.config.model.fsdp_config.cpu_offload:
+            cpu_offload = None
+        else:
+            cpu_offload = CPUOffload(offload_params=self.config.model.fsdp_config.offload_params)
+        if os.getenv("BIT_DUMP", None) != None:
+            from moe_trainer.bitdump import hook_fwd_bwd_to_module
+
+            # names = ["decoder.layers.0*", "decoder.layers.1*"]
+            names = None
+            dump_path = os.getenv("BIT_DUMP", None)
+            hook_fwd_bwd_to_module(self.model, names=names, prefix=f"{dump_path}/")
+        fsdp_strategy = self.config.model.strategy
+        print(f"Using FSDP strategy: {fsdp_strategy}")
+        if fsdp_strategy == "fsdp":
+            self.fsdp_model = FSDP(
+                self.model,
+                cpu_offload=cpu_offload,
+                param_init_fn=init_fn,
+                use_orig_params=False,
+                auto_wrap_policy=auto_wrap_policy,
+                device_id=get_torch_device().current_device(),
+                sharding_strategy=ShardingStrategy.FULL_SHARD,
+                mixed_precision=mixed_precision,
+                sync_module_states=True,
+                device_mesh=self.device_mesh,
+                forward_prefetch=False,
+            )
+
+            # print(self.fsdp_model)
+            # from .dump import hook_fwd_bwd_to_module
+
+            # names = "_fsdp_wrapped_module.lm_head"
+            names = ["_fsdp_wrapped_module.model.layers.0.*"]
+            # hook_fwd_bwd_to_module(
+            #     self.fsdp_model,
+            #     names=names,
+            #     prefix="/cpfs/user/guangsu/verl-debug/dump_data/verl/",
+            #     is_hf=True,
+            # )
+        elif fsdp_strategy == "fsdp2":
+            assert CPUOffloadPolicy is not None, "PyTorch version >= 2.4 is required for using fully_shard API (FSDP2)"
+            mp_policy = MixedPrecisionPolicy(param_dtype=torch.bfloat16, reduce_dtype=torch.float32,
+                                             cast_forward_inputs=True)
+
+            fsdp_kwargs = {
+                "mesh": self.device_mesh,
+                "mp_policy": mp_policy,
+                "offload_policy": cpu_offload,
+                "reshard_after_forward": True,
+            }
+            full_state = self.model.state_dict()
+            apply_fsdp2(self.model, fsdp_kwargs, self.config.model.fsdp_config)
+            fsdp2_load_full_state_dict(self.model, full_state, self.device_mesh, cpu_offload)
+            self.fsdp_model = self.model
+        else:
+            raise NotImplementedError(f"not implement {fsdp_strategy}")
+
+        log_gpu_memory_usage("After FSDP wrapping", logger=logger)
+
+        self.optimizer = optim.AdamW(
+            self.fsdp_model.parameters(),
+            lr=self.config.optim.lr,
+            betas=self.config.optim.betas,
+            weight_decay=self.config.optim.weight_decay,
+        )
+
+        log_gpu_memory_usage("After initialize optimizer", logger=logger)
+
+        self.steps_per_epoch = len(self.train_dataloader)
+        self.total_steps = self.steps_per_epoch * self.config.trainer.total_epochs
+
+        if self.device_mesh.get_rank() == 0:
+            print(f"Number of steps/epoch {self.steps_per_epoch}, number of epochs {self.config.trainer.total_epochs}, total number of steps {self.total_steps}")
+
+        num_warmup_steps = int(self.total_steps * self.config.optim.warmup_steps_ratio)
+
+        if not hasattr(self.config.optim, "lr_scheduler") or self.config.optim.lr_scheduler == "cosine":
+            self.lr_scheduler = get_cosine_schedule_with_warmup(optimizer=self.optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=self.total_steps)
+        elif self.config.optim.lr_scheduler == "wsd":
+            self.lr_scheduler = get_wsd_schedule_with_warmup(optimizer=self.optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=self.total_steps)
+        else:
+            raise ValueError(f"Unknown lr scheduler: {self.config.optim.lr_scheduler}")
+
+    def _compute_loss_and_backward(self, batch, do_backward=True):
+        """Compute loss with optional sequence parallelism and remove padding features"""
+        use_sp = self.use_remove_padding and self.config.ulysses_sequence_parallel_size > 1
+
+        # Move inputs to GPU and prepare loss mask
+        input_ids = batch["input_ids"].to(self.device_name)
+        attention_mask = batch["attention_mask"].to(self.device_name)
+        position_ids = batch["position_ids"].to(self.device_name)
+        # labels = batch["labels"].to(self.device_name)
+        loss_mask = batch.pop("loss_mask")[:, :-1].reshape(-1).to(self.device_name)
+        loss_fct = nn.CrossEntropyLoss(reduction="none")
+
+        # Context manager for sequence parallel if needed
+        context = self.sharding_manager if use_sp else nullcontext()
+        with context, torch.autocast(device_type=self.device_name, dtype=torch.bfloat16):
+            if not use_sp:
+                # Standard forward pass without sequence parallel
+                labels = input_ids[:, 1:].contiguous()
+                output = self.fsdp_model(input_ids=input_ids, attention_mask=attention_mask, position_ids=position_ids, use_cache=False)
+                logits = output.logits
+                
+                shift_logits = logits[..., :-1, :].contiguous()
+                shift_labels = labels.contiguous()
+                # shift_logits = logits.contiguous()
+                # shift_labels = labels.contiguous()
+                # Flatten the tokens
+                shift_logits = shift_logits.view(-1, self.model.config.vocab_size)
+                shift_labels = shift_labels.view(-1)
+                # Enable model parallelism
+                shift_labels = shift_labels.to(shift_logits.device)
+                loss = loss_fct(shift_logits, shift_labels)
+                
+                # if torch.distributed.get_rank() == 0:
+                #     torch.save(loss, "/cpfs/user/guangsu/verl-debug/dump_data/verl/losses.pt")
+                
+                loss = loss * loss_mask.to(loss.device)
+            else:
+                # IMPORTANT: We have a big assumption here, so we can shard the SAME sequence across SP ranks
+                # i.e., each GPU has <1 sequence, and each SP group has 1 sequence
+                # 1. All SP ranks will receive the *SAME* batch
+                # 2. Different SP groups will receive *DIFFERENT* batches
+                # This is implemented by the DistributedSampler
+
+                batch_size, seqlen = input_ids.shape
+                # Remove padding
+                input_ids_rmpad, indices, *_ = unpad_input(input_ids.unsqueeze(-1), attention_mask)  # input_ids_rmpad (total_nnz, ...)
+                input_ids_rmpad = input_ids_rmpad.transpose(0, 1)  # (1, total_nnz)
+
+                # Unpad position_ids to align rotary
+                position_ids_rmpad = index_first_axis(rearrange(position_ids.unsqueeze(-1), "b s ... -> (b s) ..."), indices).transpose(0, 1)
+
+                # Pad and slice inputs for sequence parallelism
+                input_ids_rmpad_sliced, position_ids_rmpad_padded, pad_size = ulysses_pad_and_slice_inputs(input_ids_rmpad, position_ids_rmpad, sp_size=get_ulysses_sequence_parallel_world_size())
+                # For computing loss
+                input_ids_rmpad_rolled = torch.roll(input_ids_rmpad, shifts=-1, dims=1)  # (1, total_nnz)
+                input_ids_rmpad_rolled, _, _ = ulysses_pad_and_slice_inputs(input_ids_rmpad_rolled, None, get_ulysses_sequence_parallel_world_size())
+                input_ids_rmpad_rolled = input_ids_rmpad_rolled.squeeze(0)  # ((total_nnz / sp) + pad)
+
+                # Forward pass
+                output = self.fsdp_model(
+                    input_ids=input_ids_rmpad_sliced,
+                    attention_mask=None,  # Not needed with flash attention varlen
+                    position_ids=position_ids_rmpad_padded,
+                    use_cache=False,
+                )
+
+                # Compute loss locally then aggregate
+                logits_rmpad = output.logits.squeeze(0)
+                input_ids_rmpad_rolled = input_ids_rmpad_rolled.to(logits_rmpad.device)
+                loss = loss_fct(logits_rmpad, input_ids_rmpad_rolled)
+                # Gather and unpad for sequence parallelism
+                loss = gather_outpus_and_unpad(loss, gather_dim=0, unpad_dim=0, padding_size=pad_size)
+
+                # This is the loss collected from all ulysses ranks
+                full_loss = pad_input(hidden_states=loss.unsqueeze(-1), indices=indices, batch=batch_size, seqlen=seqlen)
+                full_loss = full_loss.squeeze(-1)[:, :-1]  # Remove last token's loss
+                full_loss = full_loss.reshape(-1)
+                loss_mask = loss_mask.to(full_loss.device)
+                loss = full_loss * loss_mask
+
+            valid_token_this_rank = torch.sum(loss_mask)
+
+            if self.config.data.balance_dp_token:
+                torch.distributed.all_reduce(valid_token_this_rank)
+                dp_size = self.ulysses_device_mesh.size("dp") if use_sp else torch.distributed.get_world_size()
+            else:
+                dp_size = 1
+
+            loss = torch.sum(loss) / (valid_token_this_rank + 1e-8) * dp_size
+            
+            if do_backward:
+                loss.backward()
+            return loss
+
+    def training_step(self, batch: TensorDict):
+        self.fsdp_model.train()
+
+        log_gpu_memory_usage("Before optimizer zero_grad", logger=logger)
+
+        self.optimizer.zero_grad()
+
+        log_gpu_memory_usage("After optimizer zero_grad", logger=logger)
+
+        micro_batches = batch.split(self.config.data.micro_batch_size_per_gpu)
+        n_micro_batches = len(micro_batches)
+        step_loss = 0
+        for micro_batch in micro_batches:
+            loss = self._compute_loss_and_backward(batch=micro_batch) / n_micro_batches
+            step_loss += loss.item()
+
+        if self.config.model.strategy == 'fsdp':
+            grad_norm = self.fsdp_model.clip_grad_norm_(max_norm=self.config.optim.clip_grad)
+        elif self.config.model.strategy == 'fsdp2':
+            grad_norm = fsdp2_clip_grad_norm_(self.fsdp_model.parameters(), max_norm=self.config.optim.clip_grad)
+        else:
+            raise NotImplementedError(f"not implement {self.config.model.strategy}")
+
+        log_gpu_memory_usage("Before optimizer step", logger=logger)
+
+        # if grad_norm is not finite, skip the update
+        if not torch.isfinite(grad_norm):
+            print(f"WARN: grad_norm is not finite: {grad_norm}")
+            self.optimizer.zero_grad()
+        else:
+            self.optimizer.step()
+
+        log_gpu_memory_usage("After optimizer step", logger=logger)
+
+        self.lr_scheduler.step()
+
+        # reduce loss across dp ranks
+        lr = self.lr_scheduler.get_last_lr()[0]
+
+        log_gpu_memory_usage("After offload weights", logger=logger)
+
+        step_loss = torch.tensor(step_loss).to(self.device_name)
+        if is_cuda_available:
+            torch.distributed.all_reduce(step_loss, op=torch.distributed.ReduceOp.AVG)
+        elif is_npu_available:
+            torch.distributed.all_reduce(step_loss)
+            step_loss /= self.ulysses_device_mesh.size(0)
+        return {"train/loss": step_loss.detach().item(), "train/lr(1e-3)": lr * 1e3}
+
+    def validation_step(self, batch: TensorDict):
+        self.fsdp_model.eval()
+        with torch.no_grad():
+            loss = self._compute_loss_and_backward(batch, do_backward=False)
+            if is_cuda_available:
+                torch.distributed.all_reduce(loss, op=torch.distributed.ReduceOp.AVG)
+            elif is_npu_available:
+                torch.distributed.all_reduce(loss)
+                loss /= self.ulysses_device_mesh.size(0)
+        return loss
+
+    def save_checkpoint(self, step):
+        # save checkpoint
+        path = os.path.join(self.config.trainer.default_local_dir, f"global_step_{step}")
+
+        fsdp_strategy = self.config.model.strategy
+        if fsdp_strategy == "fsdp":
+            # FSDP1 checkpoint saving
+            from torch.distributed.fsdp import FullStateDictConfig, StateDictType
+
+            cfg = FullStateDictConfig(offload_to_cpu=True, rank0_only=True)
+            with FSDP.state_dict_type(self.fsdp_model, StateDictType.FULL_STATE_DICT, cfg):
+                state_dict = self.fsdp_model.state_dict()
+
+            # save huggingface model
+            if self.device_mesh.get_rank() == 0:
+                os.makedirs(path, exist_ok=True)
+                self.model.save_pretrained(path, state_dict=state_dict)
+                self.tokenizer.save_pretrained(path)
+        elif fsdp_strategy == "fsdp2":
+            # FSDP2 checkpoint saving
+            from torch.distributed.checkpoint.state_dict import StateDictOptions, get_model_state_dict
+
+            # Get full state dict with FSDP2
+            options = StateDictOptions(full_state_dict=True, cpu_offload=True)
+            state_dict = get_model_state_dict(self.fsdp_model, options=options)
+
+            # save huggingface model
+            if self.device_mesh.get_rank() == 0:
+                os.makedirs(path, exist_ok=True)
+                self.model.save_pretrained(path, state_dict=state_dict)
+                self.model_config.save_pretrained(path)
+                self.tokenizer.save_pretrained(path)
+        else:
+            raise NotImplementedError(f"not implement {fsdp_strategy}")
+
+        # Copy to HDFS if configured
+        if self.device_mesh.get_rank() == 0 and self.config.trainer.default_hdfs_dir:
+            hdfs_io.makedirs(self.config.trainer.default_hdfs_dir, exist_ok=True)
+            hdfs_io.copy(src=path, dst=self.config.trainer.default_hdfs_dir, dirs_exist_ok=True)
+
+        torch.distributed.barrier()
+
+    def fit(self):
+        rank = self.device_mesh.get_rank()
+
+        # TODO: add a unified tracking
+        if rank == 0:
+            tracking = Tracking(
+                project_name=self.config.trainer.project_name,
+                experiment_name=self.config.trainer.experiment_name,
+                default_backend=self.config.trainer.logger,
+            )
+
+        global_step = 0
+        last_valid_metric = None
+        # compute the total training steps.
+        # the total training steps in SFT is mainly for early exit
+        total_training_steps = len(self.train_dataloader) * self.config.trainer.total_epochs
+
+        if self.config.trainer.total_training_steps is not None:
+            total_training_steps = self.config.trainer.total_training_steps
+
+        self.total_training_steps = total_training_steps
+        print(f"Total training steps: {self.total_training_steps}")
+
+        # TODO (zhangchi.usc1992) add back checkpoint manager.
+        # Currently, it blocks when uploading to hdfs. So very slow.
+
+        for epoch in range(self.config.trainer.total_epochs):
+            self.train_sampler.set_epoch(epoch=epoch)
+            for data in tqdm(
+                self.train_dataloader,
+                total=self.steps_per_epoch,
+                desc=f"Epoch {epoch + 1}/{self.config.trainer.total_epochs}",
+                disable=rank != 0,
+            ):
+                global_step += 1
+                data = TensorDict(data, batch_size=self.config.data.train_batch_size).to(self.device_name)
+                metric = self.training_step(data)
+                if rank == 0:
+                    tracking.log(data=metric, step=global_step)
+
+                is_last_step = global_step >= self.total_training_steps
+                is_valid_step = global_step % self.config.trainer.test_freq == 0
+                is_save_step = global_step % self.config.trainer.save_freq == 0
+
+                # early exit or validation step
+                # if is_last_step or (
+                #     self.config.trainer.test_freq > 0 and is_valid_step
+                # ):
+                if False:
+                    # Perform validation
+                    val_losses = []
+                    for val_data in self.val_dataloader:
+                        val_data = TensorDict(val_data, batch_size=self.config.data.micro_batch_size_per_gpu).to(self.device_name)
+                        val_loss = self.validation_step(val_data)
+                        val_losses.append(val_loss)
+                    if rank == 0:
+                        val_loss = torch.mean(torch.stack(val_losses))
+                        metric = {"val/loss": val_loss.detach().item()}
+                        tracking.log(data=metric, step=global_step)
+                        last_valid_metric = metric
+                    torch.distributed.barrier()
+
+                if is_last_step or (self.config.trainer.save_freq > 0 and is_save_step):
+                    self.save_checkpoint(step=global_step)
+
+                if is_last_step:
+                    if rank == 0:
+                        print(f"Final validation metrics: {last_valid_metric}")
+                    return
+
+
+def run_sft(config):
+    device_name = get_device_name()
+    local_rank, rank, world_size = initialize_global_process_group()
+
+    device_mesh = init_device_mesh(device_type=device_name, mesh_shape=(world_size,), mesh_dim_names=("fsdp",))
+    dp_size = world_size // config.ulysses_sequence_parallel_size
+    ulysses_device_mesh = init_device_mesh(device_type=device_name, mesh_shape=(dp_size, config.ulysses_sequence_parallel_size), mesh_dim_names=("dp", "sp"))
+    # build tokenizer and datasets first
+    from verl.utils import hf_tokenizer
+
+    local_model_path = copy_to_local(src=config.model.partial_pretrain, verbose=True)
+    tokenizer = hf_tokenizer(local_model_path, trust_remote_code=config.model.trust_remote_code)
+    train_dataset = create_sft_dataset(config.data.train_files, config.data, tokenizer)
+    # train_dataset = build_dataset(tokenizer_path=local_model_path)
+    val_dataset = create_sft_dataset(config.data.val_files, config.data, tokenizer)
+
+    trainer = FSDPSFTTrainer(config=config, device_mesh=device_mesh, ulysses_device_mesh=ulysses_device_mesh, tokenizer=tokenizer, train_dataset=train_dataset, val_dataset=val_dataset)
+
+    trainer.fit()
+
+    destroy_global_process_group()
+
+
+@hydra.main(config_path="config", config_name="sft_trainer", version_base=None)
+def main(config):
+    run_sft(config)
+
+
+def create_sft_dataset(data_paths, data_config, tokenizer):
+    """Create a dataset."""
+    # build dataset
+    # First check if a custom dataset class is specified
+    if data_config.custom_cls.get("path", None):
+        from verl.utils.import_utils import load_extern_type
+
+        dataset_cls = load_extern_type(data_config.custom_cls.path, data_config.custom_cls.name)
+    # Then check if multi-turn dataset should be used
+    elif data_config.get("multiturn", {}).get("enable", False):
+        dataset_cls = MultiTurnSFTDataset
+    # Default to single-turn dataset
+    else:
+        dataset_cls = SFTDataset
+
+    # Create datasets based on the selected class
+    dataset = dataset_cls(parquet_files=data_paths, tokenizer=tokenizer, config=data_config)
+    return dataset
+
+
+if __name__ == "__main__":
+    main()
diff --git a/recipe/moe/moe_trainer/main_ppo.py b/recipe/moe/moe_trainer/main_ppo.py
new file mode 100644
index 00000000000..3fb88adc1f2
--- /dev/null
+++ b/recipe/moe/moe_trainer/main_ppo.py
@@ -0,0 +1,222 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Note that we don't combine the main with ray_trainer as ray_trainer is used by other main.
+"""
+
+import hydra
+import ray
+
+from verl.trainer.ppo.ray_trainer import RayPPOTrainer
+from verl.trainer.ppo.reward import load_reward_manager
+
+
+@hydra.main(config_path="config", config_name="ppo_trainer", version_base=None)
+def main(config):
+    run_ppo(config)
+
+
+def run_ppo(config) -> None:
+    if not ray.is_initialized():
+        # this is for local ray cluster
+        ray.init(
+            runtime_env={"env_vars": {"TOKENIZERS_PARALLELISM": "true", "NCCL_DEBUG": "WARN", "VLLM_LOGGING_LEVEL": "WARN", "VLLM_ALLOW_RUNTIME_LORA_UPDATING": "true"}},
+            num_cpus=config.ray_init.num_cpus,
+        )
+
+    runner = TaskRunner.remote()
+    ray.get(runner.run.remote(config))
+    # create a timeline trace file to analyze the performance
+    timeline_json_file = config.ray_init.get("timeline_json_file", None)
+    if timeline_json_file:
+        ray.timeline(filename=timeline_json_file)
+
+
+@ray.remote(num_cpus=1)  # please make sure main_task is not scheduled on head
+class TaskRunner:
+    def run(self, config):
+        # print initial config
+        from pprint import pprint
+
+        from omegaconf import OmegaConf
+
+        from verl.utils.fs import copy_to_local
+
+        pprint(OmegaConf.to_container(config, resolve=True))  # resolve=True will eval symbol values
+        OmegaConf.resolve(config)
+
+        # download the checkpoint from hdfs
+        local_path = copy_to_local(config.actor_rollout_ref.model.path, use_shm=config.actor_rollout_ref.model.get("use_shm", False))
+
+        # instantiate tokenizer
+        from verl.utils import hf_processor, hf_tokenizer
+
+        trust_remote_code = config.data.get("trust_remote_code", False)
+        tokenizer = hf_tokenizer(local_path, trust_remote_code=trust_remote_code)
+        processor = hf_processor(local_path, trust_remote_code=trust_remote_code, use_fast=True)  # used for multimodal LLM, could be none
+
+        # vllm early verify
+        if config.actor_rollout_ref.rollout.name in ["vllm"]:
+            from verl.utils.vllm_utils import is_version_ge
+
+            if config.actor_rollout_ref.model.get("lora_rank", 0) > 0:
+                if not is_version_ge(pkg="vllm", minver="0.7.3"):
+                    raise NotImplementedError("PPO LoRA is not supported before vllm 0.7.3")
+
+        # define worker classes
+        if config.actor_rollout_ref.actor.strategy in ["fsdp", "fsdp2"]:
+            assert config.critic.strategy in ["fsdp", "fsdp2"]
+            from verl.single_controller.ray import RayWorkerGroup
+            from verl.workers.fsdp_workers import ActorRolloutRefWorker, AsyncActorRolloutRefWorker, CriticWorker
+
+            actor_rollout_cls = AsyncActorRolloutRefWorker if config.actor_rollout_ref.rollout.mode == "async" else ActorRolloutRefWorker
+            ray_worker_group_cls = RayWorkerGroup
+
+        elif config.actor_rollout_ref.actor.strategy == "megatron":
+            assert config.actor_rollout_ref.actor.strategy == config.critic.strategy
+            from verl.single_controller.ray.megatron import NVMegatronRayWorkerGroup
+            from verl.workers.megatron_workers import ActorRolloutRefWorker, AsyncActorRolloutRefWorker, CriticWorker
+
+            actor_rollout_cls = AsyncActorRolloutRefWorker if config.actor_rollout_ref.rollout.mode == "async" else ActorRolloutRefWorker
+            ray_worker_group_cls = NVMegatronRayWorkerGroup
+
+        else:
+            raise NotImplementedError
+
+        from verl.trainer.ppo.ray_trainer import ResourcePoolManager, Role
+
+        role_worker_mapping = {
+            Role.ActorRollout: ray.remote(actor_rollout_cls),
+            Role.Critic: ray.remote(CriticWorker),
+        }
+
+        global_pool_id = "global_pool"
+        resource_pool_spec = {
+            global_pool_id: [config.trainer.n_gpus_per_node] * config.trainer.nnodes,
+        }
+        mapping = {
+            Role.ActorRollout: global_pool_id,
+            Role.Critic: global_pool_id,
+        }
+
+        # we should adopt a multi-source reward function here
+        # - for rule-based rm, we directly call a reward score
+        # - for model-based rm, we call a model
+        # - for code related prompt, we send to a sandbox if there are test cases
+        # - finally, we combine all the rewards together
+        # - The reward type depends on the tag of the data
+        if config.reward_model.enable:
+            if config.reward_model.strategy in ["fsdp", "fsdp2"]:
+                from verl.workers.fsdp_workers import RewardModelWorker
+            elif config.reward_model.strategy == "megatron":
+                from verl.workers.megatron_workers import RewardModelWorker
+            else:
+                raise NotImplementedError
+            role_worker_mapping[Role.RewardModel] = ray.remote(RewardModelWorker)
+            mapping[Role.RewardModel] = global_pool_id
+
+        # use reference model
+        if config.algorithm.use_kl_in_reward or config.actor_rollout_ref.actor.use_kl_loss:
+            role_worker_mapping[Role.RefPolicy] = ray.remote(ActorRolloutRefWorker)
+            mapping[Role.RefPolicy] = global_pool_id
+
+        reward_fn = load_reward_manager(config, tokenizer, num_examine=0, **config.reward_model.get("reward_kwargs", {}))
+        val_reward_fn = load_reward_manager(config, tokenizer, num_examine=1, **config.reward_model.get("reward_kwargs", {}))
+        resource_pool_manager = ResourcePoolManager(resource_pool_spec=resource_pool_spec, mapping=mapping)
+
+        from verl.utils.dataset.rl_dataset import collate_fn
+
+        train_dataset = create_rl_dataset(config.data.train_files, config.data, tokenizer, processor)
+        val_dataset = create_rl_dataset(config.data.val_files, config.data, tokenizer, processor)
+        train_sampler = create_rl_sampler(config.data, train_dataset)
+        trainer = RayPPOTrainer(
+            config=config,
+            tokenizer=tokenizer,
+            processor=processor,
+            role_worker_mapping=role_worker_mapping,
+            resource_pool_manager=resource_pool_manager,
+            ray_worker_group_cls=ray_worker_group_cls,
+            reward_fn=reward_fn,
+            val_reward_fn=val_reward_fn,
+            train_dataset=train_dataset,
+            val_dataset=val_dataset,
+            collate_fn=collate_fn,
+            train_sampler=train_sampler,
+            device_name=config.trainer.device,
+        )
+        trainer.init_workers()
+        trainer.fit()
+
+
+def create_rl_dataset(data_paths, data_config, tokenizer, processor):
+    """Create a dataset.
+
+    Arguments:
+        data_config: The data config.
+        tokenizer (Tokenizer): The tokenizer.
+        processor (Processor): The processor.
+
+    Returns:
+        dataset (Dataset): The dataset.
+    """
+    from torch.utils.data import Dataset
+
+    from verl.utils.dataset.rl_dataset import RLHFDataset
+
+    if "custom_cls" in data_config and data_config.custom_cls.get("path", None) is not None:
+        from verl.utils.import_utils import load_extern_type
+
+        dataset_cls = load_extern_type(data_config.custom_cls.path, data_config.custom_cls.name)
+        if not issubclass(dataset_cls, Dataset):
+            raise TypeError(f"The custom dataset class '{data_config.custom_cls.name}' from '{data_config.custom_cls.path}' must inherit from torch.utils.data.Dataset")
+    else:
+        dataset_cls = RLHFDataset
+    print(f"Using dataset class: {dataset_cls.__name__}")
+
+    dataset = dataset_cls(
+        data_files=data_paths,
+        tokenizer=tokenizer,
+        processor=processor,
+        config=data_config,
+    )
+
+    return dataset
+
+
+def create_rl_sampler(data_config, dataset):
+    """Create a sampler for the dataset.
+
+    Arguments:
+        data_config: The data config.
+        dataset (Dataset): The dataset.
+
+    Returns:
+        sampler (Sampler): The sampler.
+    """
+    import torch
+    from torch.utils.data import RandomSampler, SequentialSampler
+
+    # use sampler for better ckpt resume
+    if data_config.shuffle:
+        train_dataloader_generator = torch.Generator()
+        train_dataloader_generator.manual_seed(data_config.get("seed", 1))
+        sampler = RandomSampler(data_source=dataset, generator=train_dataloader_generator)
+    else:
+        sampler = SequentialSampler(data_source=dataset)
+
+    return sampler
+
+
+if __name__ == "__main__":
+    main()
diff --git a/recipe/moe/moe_trainer/modeling_xdgmoe.py b/recipe/moe/moe_trainer/modeling_xdgmoe.py
new file mode 100644
index 00000000000..71c8bae39db
--- /dev/null
+++ b/recipe/moe/moe_trainer/modeling_xdgmoe.py
@@ -0,0 +1,1721 @@
+# coding=utf-8
+# Copyright 2023 XdgMoE-AI and The HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+print('--------------Shared Modeling---------------')
+""" PyTorch XdgMoE model."""
+import math
+import warnings
+from typing import List, Optional, Tuple, Union
+
+import torch
+import torch.nn.functional as F
+import torch.utils.checkpoint
+from torch import nn
+from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
+
+from transformers.activations import ACT2FN
+from transformers.cache_utils import Cache, DynamicCache
+from transformers.modeling_attn_mask_utils import (
+    AttentionMaskConverter,
+    _prepare_4d_attention_mask,
+    _prepare_4d_causal_attention_mask,
+    _prepare_4d_causal_attention_mask_for_sdpa,
+)
+from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast
+from transformers.modeling_utils import PreTrainedModel
+from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS, is_torch_greater_or_equal_than_1_13
+from transformers.utils import (
+    add_start_docstrings,
+    add_start_docstrings_to_model_forward,
+    is_flash_attn_2_available,
+    is_flash_attn_greater_or_equal_2_10,
+    logging,
+    replace_return_docstrings,
+)
+from transformers.utils.import_utils import is_torch_fx_available
+from .configuration_xdgmoe import XdgMoEConfig
+from transformers.modeling_outputs import TokenClassifierOutput
+
+
+if is_flash_attn_2_available():
+    from flash_attn import flash_attn_func, flash_attn_varlen_func
+    from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input  # noqa
+
+
+# This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
+# It means that the function will not be traced through and simply appear as a node in the graph.
+if is_torch_fx_available():
+    if not is_torch_greater_or_equal_than_1_13:
+        import torch.fx
+
+    _prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)
+
+
+logger = logging.get_logger(__name__)
+
+_CONFIG_FOR_DOC = "XdgMoEConfig"
+
+
+def _get_unpad_data(attention_mask):
+    seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
+    indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
+    max_seqlen_in_batch = seqlens_in_batch.max().item()
+    cu_seqlens = F.pad(torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.torch.int32), (1, 0))
+    return (
+        indices,
+        cu_seqlens,
+        max_seqlen_in_batch,
+    )
+
+
+def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Optional[int] = None):
+    warnings.warn(
+        "Calling `transformers.models.XdgMoE.modeling_XdgMoE._prepare_4d_attention_mask` is deprecated and will be removed in v4.37. Use `transformers.modeling_attn_mask_utils._prepare_4d_attention_mask"
+    )
+    return _prepare_4d_attention_mask(mask=mask, dtype=dtype, tgt_len=tgt_len)
+
+
+def _make_causal_mask(
+    input_ids_shape: torch.Size, dtype: torch.dtype, device: torch.device, past_key_values_length: int = 0
+):
+    warnings.warn(
+        "Calling `transformers.models.XdgMoE.modeling_XdgMoE._make_causal_mask` is deprecated and will be removed in v4.37. Use `transformers.models.XdgMoE.modeling_XdgMoE.AttentionMaskConverter._make_causal_mask"
+    )
+    return AttentionMaskConverter._make_causal_mask(
+        input_ids_shape=input_ids_shape, dtype=dtype, device=device, past_key_values_length=past_key_values_length
+    )
+
+
+class XdgMoERMSNorm(nn.Module):
+    def __init__(self, hidden_size, eps=1e-6):
+        """
+        XdgMoERMSNorm is equivalent to T5LayerNorm
+        """
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(hidden_size))
+        self.variance_epsilon = eps
+
+    def forward(self, hidden_states):
+        input_dtype = hidden_states.dtype
+        hidden_states = hidden_states.to(torch.float32)
+        variance = hidden_states.pow(2).mean(-1, keepdim=True)
+        hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
+        return self.weight * hidden_states.to(input_dtype)
+
+
+ALL_LAYERNORM_LAYERS.append(XdgMoERMSNorm)
+
+
+class XdgMoERotaryEmbedding(nn.Module):
+    def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None):
+        super().__init__()
+
+        self.dim = dim
+        self.max_position_embeddings = max_position_embeddings
+        self.base = base
+        inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim))
+        self.register_buffer("inv_freq", inv_freq, persistent=False)
+
+        # Build here to make `torch.jit.trace` work.
+        self._set_cos_sin_cache(
+            seq_len=max_position_embeddings, device=self.inv_freq.device, dtype=torch.get_default_dtype()
+        )
+        self.max_seq_len_cached = None
+
+
+    def _set_cos_sin_cache(self, seq_len, device, dtype):
+        self.max_seq_len_cached = seq_len
+        t = torch.arange(self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype)
+
+        freqs = torch.outer(t, self.inv_freq.to(t.device))
+        # Different from paper, but it uses a different permutation in order to obtain the same calculation
+        emb = torch.cat((freqs, freqs), dim=-1)
+        self.register_buffer("cos_cached", emb.cos().to(dtype), persistent=False)
+        self.register_buffer("sin_cached", emb.sin().to(dtype), persistent=False)
+
+    def forward(self, x, seq_len=None):
+        # x: [bs, num_attention_heads, seq_len, head_size]
+        if self.max_seq_len_cached is None or seq_len > self.max_seq_len_cached:
+            self._set_cos_sin_cache(seq_len=seq_len, device=x.device, dtype=x.dtype)
+
+        return (
+            self.cos_cached[:seq_len].to(dtype=x.dtype),
+            self.sin_cached[:seq_len].to(dtype=x.dtype),
+        )
+
+
+# Copied from transformers.models.llama.modeling_llama.LlamaLinearScalingRotaryEmbedding with Llama->XdgMoE
+class XdgMoELinearScalingRotaryEmbedding(XdgMoERotaryEmbedding):
+    """XdgMoERotaryEmbedding extended with linear scaling. Credits to the Reddit user /u/kaiokendev"""
+
+    def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None, scaling_factor=1.0):
+        self.scaling_factor = scaling_factor
+        super().__init__(dim, max_position_embeddings, base, device)
+
+    def _set_cos_sin_cache(self, seq_len, device, dtype):
+        self.max_seq_len_cached = seq_len
+        t = torch.arange(self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype)
+        t = t / self.scaling_factor
+
+        freqs = torch.outer(t, self.inv_freq)
+        # Different from paper, but it uses a different permutation in order to obtain the same calculation
+        emb = torch.cat((freqs, freqs), dim=-1)
+        self.register_buffer("cos_cached", emb.cos().to(dtype), persistent=False)
+        self.register_buffer("sin_cached", emb.sin().to(dtype), persistent=False)
+
+
+# Copied from transformers.models.llama.modeling_llama.LlamaDynamicNTKScalingRotaryEmbedding with Llama->XdgMoE
+class XdgMoEDynamicNTKScalingRotaryEmbedding(XdgMoERotaryEmbedding):
+    """XdgMoERotaryEmbedding extended with Dynamic NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla"""
+
+    def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None, scaling_factor=1.0):
+        self.scaling_factor = scaling_factor
+        super().__init__(dim, max_position_embeddings, base, device)
+
+    def _set_cos_sin_cache(self, seq_len, device, dtype):
+        self.max_seq_len_cached = seq_len
+
+        if seq_len > self.max_position_embeddings:
+            base = self.base * (
+                (self.scaling_factor * seq_len / self.max_position_embeddings) - (self.scaling_factor - 1)
+            ) ** (self.dim / (self.dim - 2))
+            inv_freq = 1.0 / (base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim))
+            self.register_buffer("inv_freq", inv_freq, persistent=False)
+
+        t = torch.arange(self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype)
+
+        freqs = torch.outer(t, self.inv_freq)
+        # Different from paper, but it uses a different permutation in order to obtain the same calculation
+        emb = torch.cat((freqs, freqs), dim=-1)
+        self.register_buffer("cos_cached", emb.cos().to(dtype), persistent=False)
+        self.register_buffer("sin_cached", emb.sin().to(dtype), persistent=False)
+
+
+# Copied from transformers.models.llama.modeling_llama.rotate_half
+def rotate_half(x):
+    """Rotates half the hidden dims of the input."""
+    x1 = x[..., : x.shape[-1] // 2]
+    x2 = x[..., x.shape[-1] // 2 :]
+    return torch.cat((-x2, x1), dim=-1)
+
+
+# Copied from transformers.models.llama.modeling_llama.apply_rotary_pos_emb
+def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
+    """Applies Rotary Position Embedding to the query and key tensors.
+
+    Args:
+        q (`torch.Tensor`): The query tensor.
+        k (`torch.Tensor`): The key tensor.
+        cos (`torch.Tensor`): The cosine part of the rotary embedding.
+        sin (`torch.Tensor`): The sine part of the rotary embedding.
+        position_ids (`torch.Tensor`):
+            The position indices of the tokens corresponding to the query and key tensors. For example, this can be
+            used to pass offsetted position ids when working with a KV-cache.
+        unsqueeze_dim (`int`, *optional*, defaults to 1):
+            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
+            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
+            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
+            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
+            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
+            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
+    Returns:
+        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
+    """
+
+    # print('rope', position_ids.shape, q.shape, k.shape, cos.shape, sin.shape, unsqueeze_dim)
+    # print('type', position_ids.dtype, q.dtype, k.dtype, cos.dtype, sin.dtype)
+    # print('value', position_ids.max(), position_ids.min(), position_ids)
+    cos = cos[position_ids].unsqueeze(unsqueeze_dim)
+    sin = sin[position_ids].unsqueeze(unsqueeze_dim)
+    q_embed = (q * cos) + (rotate_half(q) * sin)
+    k_embed = (k * cos) + (rotate_half(k) * sin)
+    return q_embed, k_embed
+
+
+class XdgMoEMLP(nn.Module):
+    def __init__(self, config, hidden_size = None, intermediate_size = None):
+        super().__init__()
+        self.config = config
+        self.hidden_size = config.hidden_size if hidden_size is None else hidden_size
+        self.intermediate_size = config.intermediate_size if intermediate_size is None else intermediate_size
+
+        self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
+        self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
+        self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=False)
+        self.act_fn = ACT2FN[config.hidden_act]
+
+    def forward(self, x):
+        if self.config.pretraining_tp > 1:
+            slice = self.intermediate_size // self.config.pretraining_tp
+            gate_proj_slices = self.gate_proj.weight.split(slice, dim=0)
+            up_proj_slices = self.up_proj.weight.split(slice, dim=0)
+            down_proj_slices = self.down_proj.weight.split(slice, dim=1)
+
+            gate_proj = torch.cat(
+                [F.linear(x, gate_proj_slices[i]) for i in range(self.config.pretraining_tp)], dim=-1
+            )
+            up_proj = torch.cat([F.linear(x, up_proj_slices[i]) for i in range(self.config.pretraining_tp)], dim=-1)
+
+            intermediate_states = (self.act_fn(gate_proj) * up_proj).split(slice, dim=2)
+            down_proj = [
+                F.linear(intermediate_states[i], down_proj_slices[i]) for i in range(self.config.pretraining_tp)
+            ]
+            down_proj = sum(down_proj)
+        else:
+            down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
+
+        return down_proj
+
+
+class MoEGate(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.top_k = config.num_experts_per_tok
+        self.n_routed_experts = config.n_routed_experts
+
+        self.scoring_func = config.scoring_func
+        self.alpha = config.aux_loss_alpha
+        self.seq_aux = config.seq_aux
+
+        # topk selection algorithm
+        self.norm_topk_prob = config.norm_topk_prob
+        self.gating_dim = config.hidden_size
+        self.weight = nn.Parameter(torch.empty((self.n_routed_experts, self.gating_dim)))
+        if self.scoring_func == "noaux_tc":
+            self.register_buffer("e_score_correction_bias", torch.empty(self.n_routed_experts, dtype=torch.bfloat16))
+            # self.e_score_correction_bias = nn.Parameter(torch.empty(self.n_routed_experts), requires_grad=False)
+        self.reset_parameters()
+        self.inspect = nn.Identity()
+        self.inspect2 = nn.Identity()
+        self.inspect3 = nn.Identity()
+        self.inspect4 = nn.Identity()
+
+
+    def reset_parameters(self) -> None:
+        import torch.nn.init  as init
+        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
+    
+    def forward(self, hidden_states):
+        bsz, seq_len, h = hidden_states.shape        
+        ### compute gating score
+        hidden_states = hidden_states.view(-1, h)
+        if self.config.moe_gating_fp32:
+            logits = F.linear(hidden_states.type(torch.float32), self.weight.type(torch.float32), None)
+        else:
+            logits = F.linear(hidden_states, self.weight, None)
+        if self.scoring_func == 'softmax':
+            scores = logits.softmax(dim=-1, dtype=torch.float32).type_as(logits)
+            ### select top-k experts
+            topk_weight, topk_idx = torch.topk(scores, k=self.top_k, dim=-1, sorted=False)
+        elif self.scoring_func == 'aux_loss_post_softmax':
+            # TODO: training not supported
+            topk_weight, topk_idx = torch.topk(logits, k=self.top_k, dim=1, sorted=False)
+            topk_weight = torch.softmax(topk_weight, dim=-1, dtype=torch.float32).type_as(logits)
+            scores = logits
+        elif self.scoring_func == "noaux_tc":
+            scores = logits.sigmoid()
+            scores_for_choice = scores.view(bsz * seq_len, -1) + self.e_score_correction_bias.unsqueeze(0)
+            _, topk_idx = torch.topk(scores_for_choice, k=self.top_k, dim=-1, sorted=False)
+            topk_weight = scores.gather(1, topk_idx)
+        else:
+            raise NotImplementedError(f'insupportable scoring function for MoE gating: {self.scoring_func}')
+        
+        ### norm gate to sum 1
+        if self.top_k > 1 and (self.norm_topk_prob or self.scoring_func == "noaux_tc"):
+            denominator = topk_weight.sum(dim=-1, keepdim=True) + 1e-20
+            topk_weight = topk_weight / denominator
+        topk_weight = topk_weight * self.config.routed_scaling_factor
+        
+        
+        ### expert-level computation auxiliary loss
+        if self.training and self.alpha > 0.0:
+            scores_for_aux = scores
+            aux_topk = self.top_k
+            topk_idx_for_aux_loss = topk_idx.view(bsz, seq_len, -1)
+            scores_for_seq_aux = scores_for_aux.view(bsz, seq_len, -1)
+            ce = torch.zeros(bsz, seq_len, self.n_routed_experts, device=hidden_states.device)
+            ce.scatter_add_(
+                2,
+                topk_idx_for_aux_loss,
+                torch.ones(bsz, seq_len, aux_topk, device=hidden_states.device),
+            )
+            fi = ce
+            pi = scores_for_seq_aux
+
+            if self.seq_aux:
+                fii = fi.mean(1).div(aux_topk).mul(self.n_routed_experts).view(bsz, -1)
+                pii = pi.mean(1)
+                aux_loss = (fii * pii).sum(dim=1).mean() * self.alpha
+            else:
+                fii = fi.mean([0, 1]).div(aux_topk).mul(self.n_routed_experts)
+                pii = pi.mean([0, 1])
+                aux_loss = (fii * pii).sum() * self.alpha
+        else:
+            aux_loss = None
+        return topk_idx, topk_weight, aux_loss
+
+
+class AddAuxiliaryLoss(torch.autograd.Function):
+    """
+    The trick function of adding auxiliary (aux) loss, 
+    which includes the gradient of the aux loss during backpropagation.
+    """
+    @staticmethod
+    def forward(ctx, x, loss):
+        assert loss.numel() == 1
+        ctx.dtype = loss.dtype
+        ctx.required_aux_loss = loss.requires_grad
+        return x
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_loss = None
+        if ctx.required_aux_loss:
+            grad_loss = torch.ones(1, dtype=ctx.dtype, device=grad_output.device)
+        return grad_output, grad_loss
+    
+    
+class XdgMoEMoE(nn.Module):
+    """
+    A mixed expert module containing shared experts.
+    """
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.num_experts_per_tok = config.num_experts_per_tok
+        self.experts = nn.ModuleList([XdgMoEMLP(config, intermediate_size = config.moe_intermediate_size) for i in range(config.n_routed_experts)])
+        self.gate = MoEGate(config)
+        self.assigned_tokens_per_expert = torch.zeros(
+            config.n_routed_experts, device=torch.cuda.current_device(), dtype=torch.bfloat16
+        )
+        self.router_expert_score_correction_coeff = (
+                self.config.router_expert_score_correction_coeff
+            )
+        # self.ga_step = 0
+        if config.n_shared_experts is not None:
+            intermediate_size = config.moe_intermediate_size * config.n_shared_experts
+            self.shared_experts = XdgMoEMLP(config=config, intermediate_size = intermediate_size)
+
+
+    def reset_statistics(self):
+        # 重置统计信息
+        self.assigned_tokens_per_expert.zero_()
+        # self.ga_step = 0
+
+    def update_correction_bias(self):
+        mean_ = self.assigned_tokens_per_expert.float().mean()
+        with torch.no_grad():
+            self.gate.e_score_correction_bias.data = (
+                self.gate.e_score_correction_bias
+                + (mean_ - self.assigned_tokens_per_expert).sign()
+                * self.router_expert_score_correction_coeff
+            )
+
+        self.reset_statistics()
+
+    def forward(self, hidden_states):
+        identity = hidden_states
+        orig_shape = hidden_states.shape
+        topk_idx, topk_weight, aux_loss = self.gate(hidden_states)
+        hidden_states = hidden_states.view(-1, hidden_states.shape[-1])
+        flat_topk_idx = topk_idx.view(-1)
+        if self.training:
+            hidden_states = hidden_states.repeat_interleave(self.num_experts_per_tok, dim=0)
+            y = torch.empty_like(hidden_states)
+            for i, expert in enumerate(self.experts):
+                y[flat_topk_idx == i] = expert(hidden_states[flat_topk_idx == i]).to(hidden_states.dtype)
+            y = (y.view(*topk_weight.shape, -1) * topk_weight.unsqueeze(-1)).sum(dim=1)
+            y =  y.to(hidden_states.dtype).view(*orig_shape)
+            self.assigned_tokens_per_expert += torch.bincount(flat_topk_idx, minlength=self.config.n_routed_experts)
+            if aux_loss:
+                y = AddAuxiliaryLoss.apply(y, aux_loss)
+        else:
+            y = self.moe_infer(hidden_states, flat_topk_idx, topk_weight.view(-1, 1)).view(*orig_shape)
+        if self.config.n_shared_experts is not None:
+            y = y + self.shared_experts(identity)
+        return y
+    
+    @torch.no_grad()
+    def moe_infer(self, x, flat_expert_indices, flat_expert_weights):
+        expert_cache = torch.zeros_like(x, dtype=flat_expert_weights.dtype)
+        idxs = flat_expert_indices.argsort()
+        tokens_per_expert = flat_expert_indices.bincount().cpu().numpy().cumsum(0)
+        token_idxs = idxs // self.num_experts_per_tok
+        for i, end_idx in enumerate(tokens_per_expert):
+            start_idx = 0 if i == 0 else tokens_per_expert[i-1]
+            if start_idx == end_idx:
+                continue
+            expert = self.experts[i]
+            exp_token_idx = token_idxs[start_idx:end_idx]
+            expert_tokens = x[exp_token_idx]
+            expert_out = expert(expert_tokens).type_as(flat_expert_weights)
+            expert_out.mul_(flat_expert_weights[idxs[start_idx:end_idx]])
+            expert_cache.scatter_reduce_(0, exp_token_idx.view(-1, 1).repeat(1, x.shape[-1]), expert_out, reduce='sum')
+        return expert_cache.type_as(x)
+
+
+# Copied from transformers.models.llama.modeling_llama.repeat_kv
+def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
+    """
+    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
+    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
+    """
+    batch, num_key_value_heads, slen, head_dim = hidden_states.shape
+    if n_rep == 1:
+        return hidden_states
+    hidden_states = hidden_states[:, :, None, :, :].expand(batch, num_key_value_heads, n_rep, slen, head_dim)
+    return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
+
+
+# Copied from transformers.models.llama.modeling_llama.LlamaAttention with Llama->XdgMoE
+class XdgMoEAttention(nn.Module):
+    """Multi-headed attention from 'Attention Is All You Need' paper"""
+
+    def __init__(self, config: XdgMoEConfig, layer_idx: Optional[int] = None):
+        super().__init__()
+        self.config = config
+        self.layer_idx = layer_idx
+        if layer_idx is None:
+            logger.warning_once(
+                f"Instantiating {self.__class__.__name__} without passing `layer_idx` is not recommended and will "
+                "to errors during the forward call, if caching is used. Please make sure to provide a `layer_idx` "
+                "when creating this class."
+            )
+
+        self.attention_dropout = config.attention_dropout
+        self.hidden_size = config.hidden_size
+        self.num_heads = config.num_attention_heads
+        self.head_dim = getattr(config, "head_dim", self.hidden_size // self.num_heads)
+        self.num_key_value_heads = config.num_key_value_heads
+        self.num_key_value_groups = self.num_heads // self.num_key_value_heads
+        self.max_position_embeddings = config.max_position_embeddings
+        self.rope_theta = config.rope_theta
+        self.is_causal = True
+        self.qk_layernorm = config.qk_layernorm
+
+        # if (self.head_dim * self.num_heads) != self.hidden_size:
+        #     raise ValueError(
+        #         f"hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}"
+        #         f" and `num_heads`: {self.num_heads})."
+        #     )
+
+        self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=config.attention_bias)
+        self.k_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=config.attention_bias)
+        self.v_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=config.attention_bias)
+        self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False)
+        self._init_rope()
+        
+        if self.qk_layernorm:
+            self.q_layernorm = XdgMoERMSNorm(self.head_dim, eps=config.rms_norm_eps)
+            self.k_layernorm = XdgMoERMSNorm(self.head_dim, eps=config.rms_norm_eps)
+        else:
+            self.q_layernorm = None
+            self.k_layernorm = None
+
+    def _init_rope(self):
+        if self.config.rope_scaling is None:
+            self.rotary_emb = XdgMoERotaryEmbedding(
+                self.head_dim,
+                max_position_embeddings=self.max_position_embeddings,
+                base=self.rope_theta,
+            )
+
+        else:
+            scaling_type = self.config.rope_scaling["type"]
+            scaling_factor = self.config.rope_scaling["factor"]
+            if scaling_type == "linear":
+                self.rotary_emb = XdgMoELinearScalingRotaryEmbedding(
+                    self.head_dim,
+                    max_position_embeddings=self.max_position_embeddings,
+                    scaling_factor=scaling_factor,
+                    base=self.rope_theta,
+                )
+            elif scaling_type == "dynamic":
+                self.rotary_emb = XdgMoEDynamicNTKScalingRotaryEmbedding(
+                    self.head_dim,
+                    max_position_embeddings=self.max_position_embeddings,
+                    scaling_factor=scaling_factor,
+                    base=self.rope_theta,
+                )
+            else:
+                raise ValueError(f"Unknown RoPE scaling type {scaling_type}")
+
+    def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
+        return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Cache] = None,
+        output_attentions: bool = False,
+        use_cache: bool = False,
+        **kwargs,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+        if "padding_mask" in kwargs:
+            warnings.warn(
+                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
+            )
+
+        bsz, q_len, _ = hidden_states.size()
+
+        if self.config.pretraining_tp > 1:
+            key_value_slicing = (self.num_key_value_heads * self.head_dim) // self.config.pretraining_tp
+            query_slices = self.q_proj.weight.split(
+                (self.num_heads * self.head_dim) // self.config.pretraining_tp, dim=0
+            )
+            key_slices = self.k_proj.weight.split(key_value_slicing, dim=0)
+            value_slices = self.v_proj.weight.split(key_value_slicing, dim=0)
+
+            query_states = [F.linear(hidden_states, query_slices[i]) for i in range(self.config.pretraining_tp)]
+            query_states = torch.cat(query_states, dim=-1)
+
+            key_states = [F.linear(hidden_states, key_slices[i]) for i in range(self.config.pretraining_tp)]
+            key_states = torch.cat(key_states, dim=-1)
+
+            value_states = [F.linear(hidden_states, value_slices[i]) for i in range(self.config.pretraining_tp)]
+            value_states = torch.cat(value_states, dim=-1)
+
+        else:
+            query_states = self.q_proj(hidden_states)
+            key_states = self.k_proj(hidden_states)
+            value_states = self.v_proj(hidden_states)
+
+        query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+        key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        
+        if self.qk_layernorm:
+            query_states = self.q_layernorm(query_states)
+            key_states = self.k_layernorm(key_states)
+
+        kv_seq_len = key_states.shape[-2]
+        if past_key_value is not None:
+            if self.layer_idx is None:
+                raise ValueError(
+                    f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} "
+                    "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class "
+                    "with a layer index."
+                )
+            kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
+        cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
+        query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
+
+        if past_key_value is not None:
+            cache_kwargs = {"sin": sin, "cos": cos}  # Specific to RoPE models
+            key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
+
+        key_states = repeat_kv(key_states, self.num_key_value_groups)
+        value_states = repeat_kv(value_states, self.num_key_value_groups)
+
+        attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
+
+        if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
+            raise ValueError(
+                f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is"
+                f" {attn_weights.size()}"
+            )
+
+        if attention_mask is not None:
+            if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
+                raise ValueError(
+                    f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}"
+                )
+            attn_weights = attn_weights + attention_mask
+
+        # upcast attention to fp32
+        attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
+        attn_weights = nn.functional.dropout(attn_weights, p=self.attention_dropout, training=self.training)
+        attn_output = torch.matmul(attn_weights, value_states)
+
+        if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
+            raise ValueError(
+                f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
+                f" {attn_output.size()}"
+            )
+
+        attn_output = attn_output.transpose(1, 2).contiguous()
+
+        attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
+
+        if self.config.pretraining_tp > 1:
+            attn_output = attn_output.split(self.hidden_size // self.config.pretraining_tp, dim=2)
+            o_proj_slices = self.o_proj.weight.split(self.hidden_size // self.config.pretraining_tp, dim=1)
+            attn_output = sum([F.linear(attn_output[i], o_proj_slices[i]) for i in range(self.config.pretraining_tp)])
+        else:
+            attn_output = self.o_proj(attn_output)
+
+        if not output_attentions:
+            attn_weights = None
+
+        return attn_output, attn_weights, past_key_value
+
+
+# Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2 with Llama->XdgMoE
+class XdgMoEFlashAttention2(XdgMoEAttention):
+    """
+    XdgMoE flash attention module. This module inherits from `XdgMoEAttention` as the weights of the module stays
+    untouched. The only required change would be on the forward pass where it needs to correctly call the public API of
+    flash attention and deal with padding tokens in case the input contains any of them.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
+        # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignement, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
+        # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
+        self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10()
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.LongTensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Cache] = None,
+        output_attentions: bool = False,
+        use_cache: bool = False,
+        **kwargs,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+        # XdgMoEFlashAttention2 attention does not support output_attentions
+        if "padding_mask" in kwargs:
+            warnings.warn(
+                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
+            )
+
+            # overwrite attention_mask with padding_mask
+            attention_mask = kwargs.pop("padding_mask")
+
+        output_attentions = False
+
+        bsz, q_len, _ = hidden_states.size()
+        query_states = self.q_proj(hidden_states)
+        key_states = self.k_proj(hidden_states)
+        value_states = self.v_proj(hidden_states)
+         
+        # Flash attention requires the input to have the shape
+        # batch_size x seq_length x head_dim x hidden_dim
+        # therefore we just need to keep the original shape
+        query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+        key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        
+        if self.qk_layernorm:
+            query_states = self.q_layernorm(query_states)
+            key_states = self.k_layernorm(key_states)
+        
+        kv_seq_len = key_states.shape[-2]
+        if past_key_value is not None:
+            kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
+        cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
+        query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
+        if past_key_value is not None:
+            cache_kwargs = {"sin": sin, "cos": cos}  # Specific to RoPE models
+            key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
+
+        # TODO: These transpose are quite inefficient but Flash Attention requires the layout [batch_size, sequence_length, num_heads, head_dim]. We would need to refactor the KV cache
+        # to be able to avoid many of these transpose/reshape/view.
+        query_states = query_states.transpose(1, 2)
+        key_states = key_states.transpose(1, 2)
+        value_states = value_states.transpose(1, 2)
+        dropout_rate = self.attention_dropout if self.training else 0.0
+
+        # In PEFT, usually we cast the layer norms in float32 for training stability reasons
+        # therefore the input hidden states gets silently casted in float32. Hence, we need
+        # cast them back in the correct dtype just to be sure everything works as expected.
+        # This might slowdown training & inference so it is recommended to not cast the LayerNorms
+        # in fp32. (XdgMoERMSNorm handles it correctly)
+        
+        input_dtype = query_states.dtype
+        if input_dtype == torch.float32:
+            # Handle the case where the model is quantized
+            if hasattr(self.config, "_pre_quantization_dtype"):
+                target_dtype = self.config._pre_quantization_dtype
+            elif torch.is_autocast_enabled():
+                target_dtype = torch.get_autocast_gpu_dtype()
+            else:
+                target_dtype = self.q_proj.weight.dtype
+
+            logger.warning_once(
+                f"The input hidden states seems to be silently casted in float32, this might be related to"
+                f" the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in"
+                f" {target_dtype}."
+            )
+
+            query_states = query_states.to(target_dtype)
+            key_states = key_states.to(target_dtype)
+            value_states = value_states.to(target_dtype)
+        
+        attn_output = self._flash_attention_forward(
+            query_states, key_states, value_states, attention_mask, q_len, dropout=dropout_rate
+        )
+        
+        attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous()
+        attn_output = self.o_proj(attn_output)
+
+        if not output_attentions:
+            attn_weights = None
+
+        return attn_output, attn_weights, past_key_value
+
+    def _flash_attention_forward(
+        self, query_states, key_states, value_states, attention_mask, query_length, dropout=0.0, softmax_scale=None
+    ):
+        """
+        Calls the forward method of Flash Attention - if the input hidden states contain at least one padding token
+        first unpad the input, then computes the attention scores and pad the final attention scores.
+
+        Args:
+            query_states (`torch.Tensor`):
+                Input query states to be passed to Flash Attention API
+            key_states (`torch.Tensor`):
+                Input key states to be passed to Flash Attention API
+            value_states (`torch.Tensor`):
+                Input value states to be passed to Flash Attention API
+            attention_mask (`torch.Tensor`):
+                The padding mask - corresponds to a tensor of size `(batch_size, seq_len)` where 0 stands for the
+                position of padding tokens and 1 for the position of non-padding tokens.
+            dropout (`int`, *optional*):
+                Attention dropout
+            softmax_scale (`float`, *optional*):
+                The scaling of QK^T before applying softmax. Default to 1 / sqrt(head_dim)
+        """
+        if not self._flash_attn_uses_top_left_mask:
+            causal = self.is_causal
+        else:
+            # TODO: Remove the `query_length != 1` check once Flash Attention for RoCm is bumped to 2.1. For details, please see the comment in XdgMoEFlashAttention2 __init__.
+            causal = self.is_causal and query_length != 1
+
+        # Contains at least one padding token in the sequence
+        if attention_mask is not None:
+            batch_size = query_states.shape[0]
+            query_states, key_states, value_states, indices_q, cu_seq_lens, max_seq_lens = self._upad_input(
+                query_states, key_states, value_states, attention_mask, query_length
+            )
+            
+            cu_seqlens_q, cu_seqlens_k = cu_seq_lens
+            max_seqlen_in_batch_q, max_seqlen_in_batch_k = max_seq_lens
+
+            attn_output_unpad = flash_attn_varlen_func(
+                query_states,
+                key_states,
+                value_states,
+                cu_seqlens_q=cu_seqlens_q,
+                cu_seqlens_k=cu_seqlens_k,
+                max_seqlen_q=max_seqlen_in_batch_q,
+                max_seqlen_k=max_seqlen_in_batch_k,
+                dropout_p=dropout,
+                softmax_scale=softmax_scale,
+                causal=causal,
+            )
+            attn_output = pad_input(attn_output_unpad, indices_q, batch_size, query_length)
+        else:
+            attn_output = flash_attn_func(
+                query_states, key_states, value_states, dropout, softmax_scale=softmax_scale, causal=causal
+            )
+
+        return attn_output
+
+    def _upad_input(self, query_layer, key_layer, value_layer, attention_mask, query_length):
+        indices_k, cu_seqlens_k, max_seqlen_in_batch_k = _get_unpad_data(attention_mask)
+        batch_size, kv_seq_len, num_key_value_heads, head_dim = key_layer.shape
+
+        key_layer = index_first_axis(
+            key_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim), indices_k
+        )
+        value_layer = index_first_axis(
+            value_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim), indices_k
+        )
+        if query_length == kv_seq_len:
+            query_layer = index_first_axis(
+                query_layer.reshape(batch_size * kv_seq_len, self.num_heads, head_dim), indices_k
+            )
+            cu_seqlens_q = cu_seqlens_k
+            max_seqlen_in_batch_q = max_seqlen_in_batch_k
+            indices_q = indices_k
+        elif query_length == 1:
+            max_seqlen_in_batch_q = 1
+            cu_seqlens_q = torch.arange(
+                batch_size + 1, dtype=torch.int32, device=query_layer.device
+            )  # There is a memcpy here, that is very bad.
+            indices_q = cu_seqlens_q[:-1]
+            query_layer = query_layer.squeeze(1)
+        else:
+            # The -q_len: slice assumes left padding.
+            attention_mask = attention_mask[:, -query_length:]
+            query_layer, indices_q, cu_seqlens_q, max_seqlen_in_batch_q = unpad_input(query_layer, attention_mask)
+
+        return (
+            query_layer,
+            key_layer,
+            value_layer,
+            indices_q,
+            (cu_seqlens_q, cu_seqlens_k),
+            (max_seqlen_in_batch_q, max_seqlen_in_batch_k),
+        )
+
+
+# Copied from transformers.models.llama.modeling_llama.LlamaSdpaAttention with Llama->XdgMoE
+class XdgMoESdpaAttention(XdgMoEAttention):
+    """
+    XdgMoE attention module using torch.nn.functional.scaled_dot_product_attention. This module inherits from
+    `XdgMoEAttention` as the weights of the module stays untouched. The only changes are on the forward pass to adapt to
+    SDPA API.
+    """
+
+    # Adapted from XdgMoEAttention.forward
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Cache] = None,
+        output_attentions: bool = False,
+        use_cache: bool = False,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+        if output_attentions:
+            # TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented.
+            logger.warning_once(
+                "XdgMoEModel is using XdgMoESdpaAttention, but `torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to the manual attention implementation, "
+                'but specifying the manual implementation will be required from Transformers version v5.0.0 onwards. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.'
+            )
+            return super().forward(
+                hidden_states=hidden_states,
+                attention_mask=attention_mask,
+                position_ids=position_ids,
+                past_key_value=past_key_value,
+                output_attentions=output_attentions,
+                use_cache=use_cache,
+            )
+
+        bsz, q_len, _ = hidden_states.size()
+
+        query_states = self.q_proj(hidden_states)
+        key_states = self.k_proj(hidden_states)
+        value_states = self.v_proj(hidden_states)
+
+        query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+        key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        
+        if self.qk_layernorm:
+            query_states = self.q_layernorm(query_states)
+            key_states = self.k_layernorm(key_states)
+
+        kv_seq_len = key_states.shape[-2]
+        if past_key_value is not None:
+            kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
+        cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
+
+        query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
+
+        if past_key_value is not None:
+            cache_kwargs = {"sin": sin, "cos": cos}  # Specific to RoPE models
+            key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
+
+        key_states = repeat_kv(key_states, self.num_key_value_groups)
+        value_states = repeat_kv(value_states, self.num_key_value_groups)
+
+        if attention_mask is not None:
+            if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
+                raise ValueError(
+                    f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}"
+                )
+
+        # SDPA with memory-efficient backend is currently (torch==2.1.2) bugged with non-contiguous inputs with custom attn_mask,
+        # Reference: https://github.com/pytorch/pytorch/issues/112577.
+        if query_states.device.type == "cuda" and attention_mask is not None:
+            query_states = query_states.contiguous()
+            key_states = key_states.contiguous()
+            value_states = value_states.contiguous()
+
+        attn_output = torch.nn.functional.scaled_dot_product_attention(
+            query_states,
+            key_states,
+            value_states,
+            attn_mask=attention_mask,
+            dropout_p=self.attention_dropout if self.training else 0.0,
+            # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1.
+            is_causal=self.is_causal and attention_mask is None and q_len > 1,
+        )
+
+        attn_output = attn_output.transpose(1, 2).contiguous()
+        attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
+
+        attn_output = self.o_proj(attn_output)
+
+        return attn_output, None, past_key_value
+
+
+XdgMoE_ATTENTION_CLASSES = {
+    "eager": XdgMoEAttention,
+    "flash_attention_2": XdgMoEFlashAttention2,
+    "sdpa": XdgMoESdpaAttention,
+}
+
+
+class XdgMoEDecoderLayer(nn.Module):
+    def __init__(self, config: XdgMoEConfig, layer_idx: int):
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        self.layer_idx = layer_idx
+        self.config = config
+        self.self_attn = XdgMoE_ATTENTION_CLASSES[config._attn_implementation](config=config, layer_idx=layer_idx)
+        self.mlp = XdgMoEMoE(config) if (config.n_routed_experts is not None and  \
+                                           layer_idx >= config.first_k_dense_replace and layer_idx % config.moe_layer_freq == 0) \
+                                        else XdgMoEMLP(config)
+        self.input_layernorm = XdgMoERMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.post_attention_layernorm = XdgMoERMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Tuple[torch.Tensor]] = None,
+        output_attentions: Optional[bool] = False,
+        use_cache: Optional[bool] = False,
+        **kwargs,
+    ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
+        """
+        Args:
+            hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
+            attention_mask (`torch.FloatTensor`, *optional*):
+                attention mask of size `(batch_size, sequence_length)` if flash attention is used or `(batch_size, 1,
+                query_sequence_length, key_sequence_length)` if default attention is used.
+            output_attentions (`bool`, *optional*):
+                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+                returned tensors for more detail.
+            use_cache (`bool`, *optional*):
+                If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
+                (see `past_key_values`).
+            past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
+        """
+        if "padding_mask" in kwargs:
+            warnings.warn(
+                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
+            )
+        residual = hidden_states
+
+        hidden_states = self.input_layernorm(hidden_states)
+
+        # Self Attention
+        hidden_states, self_attn_weights, present_key_value = self.self_attn(
+            hidden_states=hidden_states,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_value=past_key_value,
+            output_attentions=output_attentions,
+            use_cache=use_cache,
+            **kwargs,
+        )
+        hidden_states = residual + hidden_states
+        
+        # Fully Connected
+        residual = hidden_states
+        hidden_states = self.post_attention_layernorm(hidden_states)
+        hidden_states = self.mlp(hidden_states)
+        hidden_states = residual + hidden_states
+
+        outputs = (hidden_states,)
+
+        if output_attentions:
+            outputs += (self_attn_weights,)
+
+        if use_cache:
+            outputs += (present_key_value,)
+
+        return outputs
+
+
+XdgMoE_START_DOCSTRING = r"""
+    This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
+    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
+    etc.)
+
+    This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
+    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
+    and behavior.
+
+    Parameters:
+        config ([`XdgMoEConfig`]):
+            Model configuration class with all the parameters of the model. Initializing with a config file does not
+            load the weights associated with the model, only the configuration. Check out the
+            [`~PreTrainedModel.from_pretrained`] method to load the model weights.
+"""
+
+
+@add_start_docstrings(
+    "The bare XdgMoE Model outputting raw hidden-states without any specific head on top.",
+    XdgMoE_START_DOCSTRING,
+)
+class XdgMoEPreTrainedModel(PreTrainedModel):
+    config_class = XdgMoEConfig
+    base_model_prefix = "model"
+    supports_gradient_checkpointing = True
+    _no_split_modules = ["XdgMoEDecoderLayer"]
+    _skip_keys_device_placement = "past_key_values"
+    _supports_flash_attn_2 = True
+    _supports_sdpa = True
+    _supports_cache_class = True
+
+    def _init_weights(self, module):
+        std = self.config.initializer_range
+        if isinstance(module, nn.Linear):
+            module.weight.data.normal_(mean=0.0, std=std)
+            if module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.Embedding):
+            module.weight.data.normal_(mean=0.0, std=std)
+            if module.padding_idx is not None:
+                module.weight.data[module.padding_idx].zero_()
+
+
+XdgMoE_INPUTS_DOCSTRING = r"""
+    Args:
+        input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
+            Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
+            it.
+
+            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            [`PreTrainedTokenizer.__call__`] for details.
+
+            [What are input IDs?](../glossary#input-ids)
+        attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+
+            [What are attention masks?](../glossary#attention-mask)
+
+            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            [`PreTrainedTokenizer.__call__`] for details.
+
+            If `past_key_values` is used, optionally only the last `input_ids` have to be input (see
+            `past_key_values`).
+
+            If you want to change padding behavior, you should read [`modeling_opt._prepare_decoder_attention_mask`]
+            and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more
+            information on the default strategy.
+
+            - 1 indicates the head is **not masked**,
+            - 0 indicates the head is **masked**.
+        position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
+            config.n_positions - 1]`.
+
+            [What are position IDs?](../glossary#position-ids)
+        past_key_values (`Cache` or `tuple(tuple(torch.FloatTensor))`, *optional*):
+            Pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
+            blocks) that can be used to speed up sequential decoding. This typically consists in the `past_key_values`
+            returned by the model at a previous stage of decoding, when `use_cache=True` or `config.use_cache=True`.
+
+            Two formats are allowed:
+            - a [`~cache_utils.Cache`] instance;
+            - Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of
+            shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`). This is also known as the legacy
+            cache format.
+
+            The model will output the same cache format that is fed as input. If no `past_key_values` are passed, the
+            legacy cache format will be returned.
+
+            If `past_key_values` are used, the user can optionally input only the last `input_ids` (those that don't
+            have their past key value states given to this model) of shape `(batch_size, 1)` instead of all `input_ids`
+            of shape `(batch_size, sequence_length)`.
+        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
+            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
+            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
+            model's internal embedding lookup matrix.
+        use_cache (`bool`, *optional*):
+            If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
+            `past_key_values`).
+        output_attentions (`bool`, *optional*):
+            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
+            tensors for more detail.
+        output_hidden_states (`bool`, *optional*):
+            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+            more detail.
+        return_dict (`bool`, *optional*):
+            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+"""
+
+
+@add_start_docstrings(
+    "The bare XdgMoE Model outputting raw hidden-states without any specific head on top.",
+    XdgMoE_START_DOCSTRING,
+)
+class XdgMoEModel(XdgMoEPreTrainedModel):
+    """
+    Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`XdgMoEDecoderLayer`]
+
+    Args:
+        config: XdgMoEConfig
+    """
+
+    def __init__(self, config: XdgMoEConfig):
+        super().__init__(config)
+        self.padding_idx = config.pad_token_id
+        self.vocab_size = config.vocab_size
+
+        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
+        self.layers = nn.ModuleList(
+            [XdgMoEDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
+        )
+        self._use_sdpa = config._attn_implementation == "sdpa"
+        self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
+        self.norm = XdgMoERMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+        self.gradient_checkpointing = False
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def get_input_embeddings(self):
+        return self.embed_tokens
+
+    def set_input_embeddings(self, value):
+        self.embed_tokens = value
+
+    @add_start_docstrings_to_model_forward(XdgMoE_INPUTS_DOCSTRING)
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, BaseModelOutputWithPast]:
+        
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        use_cache = use_cache if use_cache is not None else self.config.use_cache
+
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        # retrieve input_ids and inputs_embeds
+        if input_ids is not None and inputs_embeds is not None:
+            raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
+        elif input_ids is not None:
+            batch_size, seq_length = input_ids.shape[:2]
+        elif inputs_embeds is not None:
+            batch_size, seq_length = inputs_embeds.shape[:2]
+        else:
+            raise ValueError("You have to specify either input_ids or inputs_embeds")
+
+        if self.gradient_checkpointing and self.training:
+            if use_cache:
+                logger.warning_once(
+                    "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`transformers."
+                )
+                use_cache = False
+
+        past_key_values_length = 0
+        if use_cache:
+            use_legacy_cache = not isinstance(past_key_values, Cache)
+            if use_legacy_cache:
+                past_key_values = DynamicCache.from_legacy_cache(past_key_values)
+            past_key_values_length = past_key_values.get_usable_length(seq_length)
+
+        if position_ids is None:
+            device = input_ids.device if input_ids is not None else inputs_embeds.device
+            position_ids = torch.arange(
+                past_key_values_length, seq_length + past_key_values_length, dtype=torch.long, device=device
+            )
+            position_ids = position_ids.unsqueeze(0)
+
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_tokens(input_ids)
+
+        if self._use_flash_attention_2:
+            # 2d mask is passed through the layers
+            attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
+        elif self._use_sdpa and not output_attentions:
+            # output_attentions=True can not be supported when using SDPA, and we fall back on
+            # the manual implementation that requires a 4D causal mask in all cases.
+            attention_mask = _prepare_4d_causal_attention_mask_for_sdpa(
+                attention_mask,
+                (batch_size, seq_length),
+                inputs_embeds,
+                past_key_values_length,
+            )
+        else:
+            # 4d mask is passed through the layers
+            attention_mask = _prepare_4d_causal_attention_mask(
+                attention_mask, (batch_size, seq_length), inputs_embeds, past_key_values_length
+            )
+
+        # embed positions
+        hidden_states = inputs_embeds
+
+        # decoder layers
+        all_hidden_states = () if output_hidden_states else None
+        all_self_attns = () if output_attentions else None
+        next_decoder_cache = None
+
+        for decoder_layer in self.layers:
+            if output_hidden_states:
+                all_hidden_states += (hidden_states,)
+
+            if self.gradient_checkpointing and self.training:
+                layer_outputs = self._gradient_checkpointing_func(
+                    decoder_layer.__call__,
+                    hidden_states,
+                    attention_mask,
+                    position_ids,
+                    past_key_values,
+                    output_attentions,
+                    use_cache,
+                )
+            else:
+                layer_outputs = decoder_layer(
+                    hidden_states,
+                    attention_mask=attention_mask,
+                    position_ids=position_ids,
+                    past_key_value=past_key_values,
+                    output_attentions=output_attentions,
+                    use_cache=use_cache,
+                )
+
+            hidden_states = layer_outputs[0]
+
+            if use_cache:
+                next_decoder_cache = layer_outputs[2 if output_attentions else 1]
+
+            if output_attentions:
+                all_self_attns += (layer_outputs[1],)
+
+        hidden_states = self.norm(hidden_states)
+
+        # add hidden states from the last decoder layer
+        if output_hidden_states:
+            all_hidden_states += (hidden_states,)
+
+        next_cache = None
+        if use_cache:
+            next_cache = next_decoder_cache.to_legacy_cache() if use_legacy_cache else next_decoder_cache
+        if not return_dict:
+            return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
+        return BaseModelOutputWithPast(
+            last_hidden_state=hidden_states,
+            past_key_values=next_cache,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attns,
+        )
+
+
+class XdgMoEForCausalLM(XdgMoEPreTrainedModel):
+    _tied_weights_keys = ["lm_head.weight"]
+
+    def __init__(self, config):
+        super().__init__(config)
+        self.model = XdgMoEModel(config)
+        self.vocab_size = config.vocab_size
+        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
+
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def get_input_embeddings(self):
+        return self.model.embed_tokens
+
+    def set_input_embeddings(self, value):
+        self.model.embed_tokens = value
+
+    def get_output_embeddings(self):
+        return self.lm_head
+
+    def set_output_embeddings(self, new_embeddings):
+        self.lm_head = new_embeddings
+
+    def set_decoder(self, decoder):
+        self.model = decoder
+
+    def get_decoder(self):
+        return self.model
+
+    @add_start_docstrings_to_model_forward(XdgMoE_INPUTS_DOCSTRING)
+    @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        loss_mask: Optional[torch.Tensor] = None,
+    ) -> Union[Tuple, CausalLMOutputWithPast]:
+        r"""
+        Args:
+            labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+                Labels for computing the masked language modeling loss. Indices should either be in `[0, transformers.,
+                config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
+                (masked), the loss is only computed for the tokens with labels in `[0, transformers., config.vocab_size]`.
+
+        Returns:
+
+        Example:
+
+        ```python
+        >>> from transformers import AutoTokenizer, XdgMoEForCausalLM
+
+        >>> model = XdgMoEForCausalLM.from_pretrained(PATH_TO_CONVERTED_WEIGHTS)
+        >>> tokenizer = AutoTokenizer.from_pretrained(PATH_TO_CONVERTED_TOKENIZER)
+
+        >>> prompt = "Hey, are you conscious? Can you talk to me?"
+        >>> inputs = tokenizer(prompt, return_tensors="pt")
+
+        >>> # Generate
+        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
+        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
+        ```"""
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
+
+        outputs = self.model(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        
+        hidden_states = outputs[0]
+        if self.config.pretraining_tp > 1:
+            lm_head_slices = self.lm_head.weight.split(self.vocab_size // self.config.pretraining_tp, dim=0)
+            logits = [F.linear(hidden_states, lm_head_slices[i]) for i in range(self.config.pretraining_tp)]
+            logits = torch.cat(logits, dim=-1)
+        else:
+            logits = self.lm_head(hidden_states)
+        logits = logits.float()
+
+        loss = None
+        if labels is not None:
+            # Shift so that tokens < n predict n
+            # shift_logits = logits[..., :-1, :].contiguous()
+            # shift_labels = labels[..., 1:].contiguous()
+            shift_logits = logits
+            shift_labels = labels
+            # Flatten the tokens
+            loss_fct = CrossEntropyLoss(reduction="none")
+            shift_logits = shift_logits.view(-1, self.config.vocab_size)
+            shift_labels = shift_labels.view(-1)
+            # Enable model parallelism
+            shift_labels = shift_labels.to(shift_logits.device)
+            losses = loss_fct(shift_logits, shift_labels)
+            if loss_mask is not None:
+                loss_mask = loss_mask.view(-1).float()
+                loss = torch.sum(losses.view(-1) * loss_mask) / loss_mask.sum()
+            else:
+                loss = losses.mean()
+
+        if not return_dict:
+            output = (logits,) + outputs[1:]
+            return (loss,) + output if loss is not None else output
+
+        return CausalLMOutputWithPast(
+            loss=loss,
+            logits=logits,
+            past_key_values=outputs.past_key_values,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+
+    def prepare_inputs_for_generation(
+        self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs
+    ):
+        if past_key_values is not None:
+            if isinstance(past_key_values, Cache):
+                cache_length = past_key_values.get_seq_length()
+                past_length = past_key_values.seen_tokens
+                max_cache_length = past_key_values.get_max_length()
+            else:
+                cache_length = past_length = past_key_values[0][0].shape[2]
+                max_cache_length = None
+
+            # Keep only the unprocessed tokens:
+            # 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where
+            # some of the inputs are exclusivelly passed as part of the cache (e.g. when passing input_embeds as
+            # input)
+            if attention_mask is not None and attention_mask.shape[1] > input_ids.shape[1]:
+                input_ids = input_ids[:, -(attention_mask.shape[1] - past_length) :]
+            # 2 - If the past_length is smaller than input_ids', then input_ids holds all input tokens. We can discard
+            # input_ids based on the past_length.
+            elif past_length < input_ids.shape[1]:
+                input_ids = input_ids[:, past_length:]
+            # 3 - Otherwise (past_length >= input_ids.shape[1]), let's assume input_ids only has unprocessed tokens.
+
+            # If we are about to go beyond the maximum cache length, we need to crop the input attention mask.
+            if (
+                max_cache_length is not None
+                and attention_mask is not None
+                and cache_length + input_ids.shape[1] > max_cache_length
+            ):
+                attention_mask = attention_mask[:, -max_cache_length:]
+
+        position_ids = kwargs.get("position_ids", None)
+        if attention_mask is not None and position_ids is None:
+            # create position_ids on the fly for batch generation
+            position_ids = attention_mask.long().cumsum(-1) - 1
+            position_ids.masked_fill_(attention_mask == 0, 1)
+            if past_key_values:
+                position_ids = position_ids[:, -input_ids.shape[1] :]
+
+        # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
+        if inputs_embeds is not None and past_key_values is None:
+            model_inputs = {"inputs_embeds": inputs_embeds}
+        else:
+            model_inputs = {"input_ids": input_ids}
+
+        model_inputs.update(
+            {
+                "position_ids": position_ids,
+                "past_key_values": past_key_values,
+                "use_cache": kwargs.get("use_cache"),
+                "attention_mask": attention_mask,
+            }
+        )
+        return model_inputs
+
+    @staticmethod
+    def _reorder_cache(past_key_values, beam_idx):
+        reordered_past = ()
+        for layer_past in past_key_values:
+            reordered_past += (
+                tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past),
+            )
+        return reordered_past
+
+    def get_tokens_per_expert(self):
+        pass
+
+
+
+@add_start_docstrings(
+    """
+    The XdgMoE Model transformer with a sequence classification head on top (linear layer).
+
+    [`XdgMoEForSequenceClassification`] uses the last token in order to do the classification, as other causal models
+    (e.g. GPT-2) do.
+
+    Since it does classification on the last token, it requires to know the position of the last token. If a
+    `pad_token_id` is defined in the configuration, it finds the last token that is not a padding token in each row. If
+    no `pad_token_id` is defined, it simply takes the last value in each row of the batch. Since it cannot guess the
+    padding tokens when `inputs_embeds` are passed instead of `input_ids`, it does the same (take the last value in
+    each row of the batch).
+    """,
+    XdgMoE_START_DOCSTRING,
+)
+class XdgMoEForSequenceClassification(XdgMoEPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        self.model = XdgMoEModel(config)
+        self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False)
+
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def get_input_embeddings(self):
+        return self.model.embed_tokens
+
+    def set_input_embeddings(self, value):
+        self.model.embed_tokens = value
+
+    @add_start_docstrings_to_model_forward(XdgMoE_INPUTS_DOCSTRING)
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, SequenceClassifierOutputWithPast]:
+        r"""
+        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
+            Labels for computing the sequence classification/regression loss. Indices should be in `[0, transformers.,
+            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
+            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        transformer_outputs = self.model(
+            input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        hidden_states = transformer_outputs[0]
+        logits = self.score(hidden_states)
+
+        if input_ids is not None:
+            batch_size = input_ids.shape[0]
+        else:
+            batch_size = inputs_embeds.shape[0]
+
+        if self.config.pad_token_id is None and batch_size != 1:
+            raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
+        if self.config.pad_token_id is None:
+            sequence_lengths = -1
+        else:
+            if input_ids is not None:
+                sequence_lengths = (torch.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1).to(
+                    logits.device
+                )
+            else:
+                sequence_lengths = -1
+
+        pooled_logits = logits[torch.arange(batch_size, device=logits.device), sequence_lengths]
+
+        loss = None
+        if labels is not None:
+            labels = labels.to(logits.device)
+            if self.config.problem_type is None:
+                if self.num_labels == 1:
+                    self.config.problem_type = "regression"
+                elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
+                    self.config.problem_type = "single_label_classification"
+                else:
+                    self.config.problem_type = "multi_label_classification"
+
+            if self.config.problem_type == "regression":
+                loss_fct = MSELoss()
+                if self.num_labels == 1:
+                    loss = loss_fct(pooled_logits.squeeze(), labels.squeeze())
+                else:
+                    loss = loss_fct(pooled_logits, labels)
+            elif self.config.problem_type == "single_label_classification":
+                loss_fct = CrossEntropyLoss()
+                loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1))
+            elif self.config.problem_type == "multi_label_classification":
+                loss_fct = BCEWithLogitsLoss()
+                loss = loss_fct(pooled_logits, labels)
+        if not return_dict:
+            output = (pooled_logits,) + transformer_outputs[1:]
+            return ((loss,) + output) if loss is not None else output
+
+        return SequenceClassifierOutputWithPast(
+            loss=loss,
+            logits=pooled_logits,
+            past_key_values=transformer_outputs.past_key_values,
+            hidden_states=transformer_outputs.hidden_states,
+            attentions=transformer_outputs.attentions,
+        )
+
+
+from transformers.modeling_outputs import TokenClassifierOutput
+
+class XdgMoEForTokenClassification(XdgMoEPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = 1
+        self.model = XdgMoEModel(config)
+        if getattr(config, "classifier_dropout", None) is not None:
+            classifier_dropout = config.classifier_dropout
+        elif getattr(config, "hidden_dropout", None) is not None:
+            classifier_dropout = config.hidden_dropout
+        else:
+            classifier_dropout = 0.1
+        self.dropout = nn.Dropout(classifier_dropout)
+        self.score = nn.Linear(config.hidden_size, config.num_labels)
+
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def get_input_embeddings(self):
+        return self.model.embed_tokens
+
+    def set_input_embeddings(self, value):
+        self.model.embed_tokens = value
+
+    def forward(
+        self,
+        input_ids: Optional[torch.LongTensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[Cache] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+    ) -> TokenClassifierOutput:
+        r"""
+        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
+            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
+            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
+            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
+        """
+
+        outputs = self.model(
+            input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+        )
+        sequence_output = outputs.last_hidden_state
+        sequence_output = self.dropout(sequence_output)
+        logits = self.score(sequence_output)
+
+        loss = None
+        if labels is not None:
+            loss = self.loss_function(logits, labels, self.config)
+
+        return TokenClassifierOutput(
+            loss=loss,
+            logits=logits,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
\ No newline at end of file
diff --git a/recipe/moe/moe_trainer/ppo/__init__.py b/recipe/moe/moe_trainer/ppo/__init__.py
new file mode 100644
index 00000000000..1ce90c5eb35
--- /dev/null
+++ b/recipe/moe/moe_trainer/ppo/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/recipe/moe/moe_trainer/ppo/core_algos.py b/recipe/moe/moe_trainer/ppo/core_algos.py
new file mode 100644
index 00000000000..ac9344cddcb
--- /dev/null
+++ b/recipe/moe/moe_trainer/ppo/core_algos.py
@@ -0,0 +1,670 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Core functions to implement PPO algorithms.
+The function implemented in this file should be used by trainer with different distributed strategies to
+implement PPO
+"""
+
+from collections import defaultdict
+
+import numpy as np
+import torch
+
+import verl.utils.torch_functional as verl_F
+
+
+class AdaptiveKLController:
+    """
+    Adaptive KL controller described in the paper:
+    https://arxiv.org/pdf/1909.08593.pdf
+    """
+
+    def __init__(self, init_kl_coef, target_kl, horizon):
+        self.value = init_kl_coef
+        self.target = target_kl
+        self.horizon = horizon
+
+    def update(self, current_kl, n_steps):
+        target = self.target
+        proportional_error = np.clip(current_kl / target - 1, -0.2, 0.2)
+        mult = 1 + proportional_error * n_steps / self.horizon
+        self.value *= mult
+
+
+class FixedKLController:
+    """Fixed KL controller."""
+
+    def __init__(self, kl_coef):
+        self.value = kl_coef
+
+    def update(self, current_kl, n_steps):
+        pass
+
+
+def get_kl_controller(kl_ctrl):
+    if kl_ctrl.type == "fixed":
+        return FixedKLController(kl_coef=kl_ctrl.kl_coef)
+    elif kl_ctrl.type == "adaptive":
+        assert kl_ctrl.horizon > 0, f"horizon must be larger than 0. Got {kl_ctrl.horizon}"
+        return AdaptiveKLController(init_kl_coef=kl_ctrl.kl_coef, target_kl=kl_ctrl.target_kl, horizon=kl_ctrl.horizon)
+    else:
+        raise NotImplementedError
+
+
+def compute_gae_advantage_return(
+    token_level_rewards: torch.Tensor,
+    values: torch.Tensor,
+    response_mask: torch.Tensor,
+    gamma: torch.Tensor,
+    lam: torch.Tensor,
+):
+    """Adapted from https://github.com/huggingface/trl/blob/main/trl/trainer/ppo_trainer.py
+
+    Args:
+        token_level_rewards: `(torch.Tensor)`
+            shape is (bs, response_length)
+        values: `(torch.Tensor)`
+            shape is (bs, response_length)
+        response_mask: `(torch.Tensor)`
+            shape is (bs, response_length). [EOS] mask. The token after [EOS] have mask zero.
+        gamma is `(float)`
+            discounted factor used in RL
+        lam: `(float)`
+            lambda value when computing Generalized Advantage Estimation (https://arxiv.org/abs/1506.02438)
+
+    Returns:
+        advantages: `(torch.Tensor)`
+            shape: (bs, response_length)
+        Returns: `(torch.Tensor)`
+            shape: (bs, response_length)
+
+    """
+    with torch.no_grad():
+        lastgaelam = 0
+        advantages_reversed = []
+        gen_len = token_level_rewards.shape[-1]
+
+        for t in reversed(range(gen_len)):
+            nextvalues = values[:, t + 1] if t < gen_len - 1 else 0.0
+            delta = token_level_rewards[:, t] + gamma * nextvalues - values[:, t]
+            lastgaelam = delta + gamma * lam * lastgaelam
+            advantages_reversed.append(lastgaelam)
+        advantages = torch.stack(advantages_reversed[::-1], dim=1)
+
+        returns = advantages + values
+        advantages = verl_F.masked_whiten(advantages, response_mask)
+    return advantages, returns
+
+
+# NOTE(sgm): this implementation only consider outcome supervision, where the reward is a scalar.
+def compute_grpo_outcome_advantage(
+    token_level_rewards: torch.Tensor,
+    response_mask: torch.Tensor,
+    index: np.ndarray,
+    epsilon: float = 1e-6,
+    norm_adv_by_std_in_grpo: str = True,
+):
+    """
+    Compute advantage for GRPO, operating only on Outcome reward
+    (with only one scalar reward for each response).
+
+    Args:
+        token_level_rewards: `(torch.Tensor)`
+            shape is (bs, response_length)
+        response_mask: `(torch.Tensor)`
+            shape is (bs, response_length)
+        norm_adv_by_std_in_grpo: (bool)
+            whether to scale the GRPO advantage.
+            If True, the advantage is scaled by the std, as in the original GRPO.
+            If False, the advantage is not scaled, as in Dr.GRPO (https://arxiv.org/abs/2503.20783).
+
+    Returns:
+        advantages: `(torch.Tensor)`
+            shape is (bs, response_length)
+        Returns: `(torch.Tensor)`
+            shape is (bs, response_length)
+    """
+    scores = token_level_rewards.sum(dim=-1)
+
+    id2score = defaultdict(list)
+    id2mean = {}
+    id2std = {}
+
+    with torch.no_grad():
+        bsz = scores.shape[0]
+        for i in range(bsz):
+            id2score[index[i]].append(scores[i])
+        for idx in id2score:
+            if len(id2score[idx]) == 1:
+                id2mean[idx] = torch.tensor(0.0)
+                id2std[idx] = torch.tensor(1.0)
+            elif len(id2score[idx]) > 1:
+                id2mean[idx] = torch.mean(torch.tensor(id2score[idx]))
+                id2std[idx] = torch.std(torch.tensor([id2score[idx]]))
+            else:
+                raise ValueError(f"no score in prompt index: {idx}")
+        for i in range(bsz):
+            if norm_adv_by_std_in_grpo:
+                scores[i] = (scores[i] - id2mean[index[i]]) / (id2std[index[i]] + epsilon)
+            else:
+                scores[i] = scores[i] - id2mean[index[i]]
+        scores = scores.unsqueeze(-1) * response_mask
+
+    return scores, scores
+
+
+def compute_grpo_passk_outcome_advantage(
+    token_level_rewards: torch.Tensor,
+    response_mask: torch.Tensor,
+    index: np.ndarray,
+    epsilon: float = 1e-6,
+    norm_adv_by_std_in_grpo: bool = True,
+):
+    """
+    Compute advantage for Pass@k using a GRPO-style outcome reward formulation.
+    Only the best response per group gets a non-zero advantage: r_max - r_second_max.
+
+    Implemented as described in https://arxiv.org/abs/2503.19595.
+
+    Args:
+        token_level_rewards: (bs, response_length)
+        response_mask: (bs, response_length)
+        index: (bs,) → group ID per sample
+        epsilon: float for numerical stability
+        norm_adv_by_std_in_grpo: if True, normalize advantage by std within group
+
+    Returns:
+        advantages: (bs, response_length)
+        returns: (bs, response_length)
+    """
+    scores = token_level_rewards.sum(dim=-1)  # (bs,)
+    advantages = torch.zeros_like(scores)
+
+    id2scores = defaultdict(list)
+    id2indices = defaultdict(list)
+
+    with torch.no_grad():
+        bsz = scores.shape[0]
+        for i in range(bsz):
+            idx = index[i]
+            id2scores[idx].append(scores[i])
+            id2indices[idx].append(i)
+
+        for idx in id2scores:
+            rewards = torch.stack(id2scores[idx])  # (k,)
+            if rewards.numel() < 2:
+                raise ValueError(f"Pass@k requires at least 2 samples per group. Got {rewards.numel()} for group {idx}.")
+            topk, topk_idx = torch.topk(rewards, 2)
+            r_max, r_second_max = topk[0], topk[1]
+            i_max = id2indices[idx][topk_idx[0].item()]
+            advantage = r_max - r_second_max
+            if norm_adv_by_std_in_grpo:
+                std = torch.std(rewards)
+                advantage = advantage / (std + epsilon)
+            advantages[i_max] = advantage
+
+    advantages = advantages.unsqueeze(-1) * response_mask
+    return advantages, advantages
+
+
+def compute_reinforce_plus_plus_baseline_outcome_advantage(token_level_rewards: torch.Tensor, response_mask: torch.Tensor, index: torch.Tensor, epsilon: float = 1e-6):
+    """
+    Compute advantage for RF++-baseline (https://arxiv.org/abs/2501.03262), operating only on Outcome reward
+    (with only one scalar reward for each response).
+
+    Args:
+        token_level_rewards: `(torch.Tensor)`
+            shape: (bs, response_length)
+        response_mask: `(torch.Tensor)`
+            shape: (bs, response_length)
+
+    Returns:
+        advantages: `(torch.Tensor)`
+            shape: (bs, response_length)
+        Returns: `(torch.Tensor)`
+            shape: (bs, response_length)
+    """
+    response_length = token_level_rewards.shape[-1]
+    scores = token_level_rewards.sum(dim=-1)
+
+    id2score = defaultdict(list)
+    id2mean = {}
+
+    with torch.no_grad():
+        bsz = scores.shape[0]
+        for i in range(bsz):
+            id2score[index[i]].append(scores[i])
+        for idx in id2score:
+            if len(id2score[idx]) == 1:
+                id2mean[idx] = torch.tensor(0.0)
+            elif len(id2score[idx]) > 1:
+                id2mean[idx] = torch.mean(torch.tensor(id2score[idx]))
+            else:
+                raise ValueError(f"no score in prompt index: {idx}")
+        for i in range(bsz):
+            scores[i] = scores[i] - id2mean[index[i]]
+
+        scores = scores.unsqueeze(-1).tile([1, response_length]) * response_mask
+        scores = verl_F.masked_whiten(scores, response_mask) * response_mask
+
+    return scores, scores
+
+
+def compute_rloo_outcome_advantage(token_level_rewards: torch.Tensor, response_mask: torch.Tensor, index: np.ndarray, epsilon: float = 1e-6):
+    """
+    Compute advantage for RLOO based on https://arxiv.org/abs/2402.14740
+
+    Args:
+        token_level_rewards: `(torch.Tensor)`
+            shape: (bs, response_length)
+        response_mask: `(torch.Tensor)`
+            shape: (bs, response_length)
+
+    Returns:
+        advantages: `(torch.Tensor)`
+            shape: (bs, response_length)
+        Returns: `(torch.Tensor)`
+            shape: (bs, response_length)
+    """
+    scores = token_level_rewards.sum(dim=-1)
+
+    id2score = defaultdict(list)
+    id2mean = {}
+
+    with torch.no_grad():
+        bsz = scores.shape[0]
+        for i in range(bsz):
+            id2score[index[i]].append(scores[i])
+        for idx in id2score:
+            if len(id2score[idx]) == 1:
+                id2mean[idx] = torch.tensor(0.0)
+            elif len(id2score[idx]) > 1:
+                id2mean[idx] = torch.mean(torch.tensor(id2score[idx]))
+            else:
+                raise ValueError(f"no score in prompt index: {idx}")
+        for i in range(bsz):
+            response_num = len(id2score[index[i]])
+            if response_num > 1:
+                scores[i] = scores[i] * response_num / (response_num - 1) - id2mean[index[i]] * response_num / (response_num - 1)
+        scores = scores.unsqueeze(-1) * response_mask
+
+    return scores, scores
+
+
+def compute_opo_outcome_advantage(token_level_rewards: torch.Tensor, response_mask: torch.Tensor, index: np.ndarray, epsilon: float = 1e-6):
+    """
+    Compute advantage for OPO based on https://arxiv.org/pdf/2505.23585
+
+    Args:
+        token_level_rewards: `(torch.Tensor)`
+            shape: (bs, response_length)
+        response_mask: `(torch.Tensor)`
+            shape: (bs, response_length)
+
+    Returns:
+        advantages: `(torch.Tensor)`
+            shape: (bs, response_length)
+        Returns: `(torch.Tensor)`
+            shape: (bs, response_length)
+    """
+    response_length = response_mask.sum(dim=-1)
+    scores = token_level_rewards.sum(dim=-1)
+
+    id2score = defaultdict(list)
+    id2len = defaultdict(list)
+    id2bsl = {}
+
+    with torch.no_grad():
+        bsz = scores.shape[0]
+        for i in range(bsz):
+            id2score[index[i]].append(scores[i])
+            id2len[index[i]].append(response_length[i])
+
+        for idx in id2score:
+            if len(id2score[idx]) == 1:
+                id2bsl[idx] = torch.tensor(0.0)
+            elif len(id2score[idx]) > 1:
+                score_tensor = torch.tensor(id2score[idx])
+                len_tensor = torch.tensor(id2len[idx])
+                id2bsl[idx] = (len_tensor * score_tensor).sum() / len_tensor.sum()
+            else:
+                raise ValueError(f"no score in prompt index: {idx}")
+        for i in range(bsz):
+            scores[i] = scores[i] - id2bsl[index[i]]
+        scores = scores.unsqueeze(-1) * response_mask
+
+    return scores, scores
+
+
+def compute_reinforce_plus_plus_outcome_advantage(token_level_rewards: torch.Tensor, response_mask: torch.Tensor, gamma: torch.Tensor):
+    """
+    Compute advantage for REINFORCE++.
+    This implementation is based on the paper: https://arxiv.org/abs/2501.03262
+
+    Args:
+        token_level_rewards: `(torch.Tensor)`
+            shape: (bs, response_length)
+        response_mask: `(torch.Tensor)`
+            shape: (bs, response_length)
+
+    Returns:
+        advantages: `(torch.Tensor)`
+            shape: (bs, response_length)
+        Returns: `(torch.Tensor)`
+            shape: (bs, response_length)
+    """
+
+    with torch.no_grad():
+        returns = torch.zeros_like(token_level_rewards)
+        running_return = 0
+
+        for t in reversed(range(token_level_rewards.shape[1])):
+            running_return = token_level_rewards[:, t] + gamma * running_return
+            returns[:, t] = running_return
+            # Reset after EOS
+            running_return = running_return * response_mask[:, t]
+
+        advantages = verl_F.masked_whiten(returns, response_mask)
+        advantages = advantages * response_mask
+
+    return advantages, returns
+
+
+def compute_remax_outcome_advantage(token_level_rewards: torch.Tensor, reward_baselines: torch.Tensor, response_mask: torch.Tensor):
+    """
+    Compute advantage for ReMax, operating only on Outcome reward
+    This implementation is based on the paper: https://arxiv.org/abs/2310.10505
+    (with only one scalar reward for each response).
+
+    Args:
+        token_level_rewards: `(torch.Tensor)`
+            shape: (bs, response_length)
+        reward_baselines: `(torch.Tensor)`
+            shape: (bs,)
+        response_mask: `(torch.Tensor)`
+            shape: (bs, response_length)
+
+    Returns:
+        advantages: `(torch.Tensor)`
+            shape: (bs, response_length)
+        Returns: `(torch.Tensor)`
+            shape: (bs, response_length)
+    """
+
+    with torch.no_grad():
+        returns = (token_level_rewards * response_mask).flip(dims=[-1]).cumsum(dim=-1).flip(dims=[-1])
+        advantages = returns - reward_baselines.unsqueeze(-1) * response_mask
+
+    return advantages, returns
+
+
+def compute_rewards(token_level_scores, old_log_prob, ref_log_prob, kl_ratio):
+    kl = old_log_prob - ref_log_prob
+    return token_level_scores - kl * kl_ratio
+
+
+def agg_loss(loss_mat: torch.Tensor, loss_mask: torch.Tensor, loss_agg_mode: str):
+    """
+    Aggregate the loss matrix into a scalar.
+
+    Args:
+        loss_mat: `(torch.Tensor)`:
+            shape: (bs, response_length)
+        loss_mask: `(torch.Tensor)`:
+            shape: (bs, response_length)
+        loss_agg_mode: (str) choices:
+            method to aggregate the loss matrix into a scalar.
+    Returns:
+        loss: `a scalar torch.Tensor`
+            aggregated loss
+    """
+    if loss_agg_mode == "token-mean":
+        loss = verl_F.masked_mean(loss_mat, loss_mask)
+    elif loss_agg_mode == "seq-mean-token-sum":
+        seq_losses = torch.sum(loss_mat * loss_mask, dim=-1)  # token-sum
+        loss = torch.mean(seq_losses)  # seq-mean
+    elif loss_agg_mode == "seq-mean-token-mean":
+        seq_losses = torch.sum(loss_mat * loss_mask, dim=-1) / torch.sum(loss_mask, dim=-1)  # token-mean
+        loss = torch.mean(seq_losses)  # seq-mean
+    elif loss_agg_mode == "seq-mean-token-sum-norm":
+        seq_losses = torch.sum(loss_mat * loss_mask, dim=-1)
+        loss = torch.sum(seq_losses) / loss_mask.shape[-1]  # The divisor
+        # (loss_mask.shape[-1]) should ideally be constant
+        # throughout training to well-replicate the DrGRPO paper.
+        # TODO: Perhaps add user-defined normalizer argument to
+        # agg_loss to ensure divisor stays constant throughout.
+    else:
+        raise ValueError(f"Invalid loss_agg_mode: {loss_agg_mode}")
+
+    return loss
+
+
+def compute_policy_loss(
+    old_log_prob,
+    log_prob,
+    advantages,
+    response_mask,
+    cliprange=None,
+    cliprange_low=None,
+    cliprange_high=None,
+    clip_ratio_c=3.0,
+    loss_agg_mode: str = "token-mean",
+):
+    """
+    Compute the clipped policy objective and related metrics for PPO.
+
+    Adapted from
+    https://github.com/huggingface/trl/blob/main/trl/trainer/ppo_trainer.py#L1122
+
+    Args:
+        old_log_prob (torch.Tensor):
+            Log-probabilities of actions under the old policy, shape (batch_size, response_length).
+        log_prob (torch.Tensor):
+            Log-probabilities of actions under the current policy, shape (batch_size, response_length).
+        advantages (torch.Tensor):
+            Advantage estimates for each action, shape (batch_size, response_length).
+        response_mask (torch.Tensor):
+            Mask indicating which tokens to include in the loss, shape (batch_size, response_length).
+        cliprange (float, optional):
+            Clipping parameter ε for standard PPO. See https://arxiv.org/abs/1707.06347.
+            Defaults to None (must be provided).
+        cliprange_low (float, optional):
+            Lower clip range for dual-clip PPO. Defaults to same as `cliprange`.
+        cliprange_high (float, optional):
+            Upper clip range for dual-clip PPO. Defaults to same as `cliprange`.
+        clip_ratio_c (float, optional):
+            Lower bound of the ratio for dual-clip PPO. See https://arxiv.org/pdf/1912.09729.
+            Defaults to 3.0.
+        loss_agg_mode (str, optional):
+            Aggregation mode for `agg_loss`. Defaults to "token-mean".
+    """
+    assert clip_ratio_c > 1.0, "The lower bound of the clip_ratio_c for dual-clip PPO should be greater than 1.0," + f" but get the value: {clip_ratio_c}."
+
+    negative_approx_kl = log_prob - old_log_prob
+    ratio = torch.exp(negative_approx_kl)
+    ppo_kl = verl_F.masked_mean(-negative_approx_kl, response_mask)
+
+    pg_losses1 = -advantages * ratio
+    if cliprange_low is None:
+        cliprange_low = cliprange
+    if cliprange_high is None:
+        cliprange_high = cliprange
+    pg_losses2 = -advantages * torch.clamp(ratio, 1 - cliprange_low, 1 + cliprange_high)  # - clip(ratio, 1-cliprange, 1+cliprange) * A
+    clip_pg_losses1 = torch.maximum(pg_losses1, pg_losses2)  # max(-ratio * A, -clip(ratio, 1-cliprange, 1+cliprange) * A)
+    pg_clipfrac = verl_F.masked_mean(torch.gt(pg_losses2, pg_losses1).float(), response_mask)
+
+    pg_losses3 = -advantages * clip_ratio_c
+    clip_pg_losses2 = torch.min(pg_losses3, clip_pg_losses1)
+    pg_clipfrac_lower = verl_F.masked_mean(torch.gt(clip_pg_losses1, pg_losses3) * (advantages < 0).float(), response_mask)
+
+    pg_losses = torch.where(advantages < 0, clip_pg_losses2, clip_pg_losses1)
+    pg_loss = agg_loss(loss_mat=pg_losses, loss_mask=response_mask, loss_agg_mode=loss_agg_mode)
+
+    return pg_loss, pg_clipfrac, ppo_kl, pg_clipfrac_lower
+
+
+def compute_entropy_loss(logits, response_mask, loss_agg_mode: str = "token-mean"):
+    """Compute categorical entropy loss (For backward compatibility)
+
+    Args:
+        logits (torch.Tensor): shape is (bs, response_length, vocab_size)
+        response_mask (torch.Tensor): shape is (bs, response_length)
+
+    Returns:
+        entropy: a scalar torch.Tensor
+
+    """
+    # compute entropy
+    token_entropy = verl_F.entropy_from_logits(logits)  # (bs, response_len)
+    entropy_loss = agg_loss(loss_mat=token_entropy, loss_mask=response_mask, loss_agg_mode=loss_agg_mode)
+    return entropy_loss
+
+
+def compute_value_loss(vpreds: torch.Tensor, returns: torch.Tensor, values: torch.Tensor, response_mask: torch.Tensor, cliprange_value: float, loss_agg_mode: str = "token-mean"):
+    """
+    Compute the clipped value-function loss for PPO.
+
+    Copied from https://github.com/huggingface/trl/blob/main/trl/trainer/ppo_trainer.py#L1151
+
+    Args:
+        vpreds (torch.FloatTensor):
+            Predicted values from the value head, shape (batch_size, response_length).
+        values (torch.FloatTensor):
+            Old (baseline) values from the value head, shape (batch_size, response_length).
+        returns (torch.FloatTensor):
+            Ground-truth returns, shape (batch_size, response_length).
+        response_mask (torch.Tensor):
+            Mask indicating which tokens to include in the value loss calculation.
+        cliprange_value (float):
+            Clip range for value prediction updates.
+        loss_agg_mode (str, optional):
+            Aggregation mode for `agg_loss`. Defaults to "token-mean".
+
+    Returns:
+        vf_loss (torch.FloatTensor):
+            A scalar tensor containing the aggregated value-function loss.
+        vf_clipfrac (float):
+            Fraction of elements where the clipped loss was used.
+    """
+    vpredclipped = verl_F.clip_by_value(vpreds, values - cliprange_value, values + cliprange_value)
+    vf_losses1 = (vpreds - returns) ** 2
+    vf_losses2 = (vpredclipped - returns) ** 2
+    clipped_vf_losses = torch.max(vf_losses1, vf_losses2)
+    vf_loss = agg_loss(loss_mat=clipped_vf_losses, loss_mask=response_mask, loss_agg_mode=loss_agg_mode)
+    vf_clipfrac = verl_F.masked_mean(torch.gt(vf_losses2, vf_losses1).float(), response_mask)
+    return vf_loss, vf_clipfrac
+
+
+def kl_penalty(logprob: torch.FloatTensor, ref_logprob: torch.FloatTensor, kl_penalty) -> torch.FloatTensor:
+    """Compute KL divergence given logprob and ref_logprob.
+    Copied from https://github.com/huggingface/trl/blob/main/trl/trainer/ppo_trainer.py#L1104
+    See more description in http://joschu.net/blog/kl-approx.html
+
+    Args:
+        logprob:
+        ref_logprob:
+
+    Returns:
+
+    """
+    if kl_penalty in ("kl", "k1"):
+        return logprob - ref_logprob
+
+    if kl_penalty == "abs":
+        return (logprob - ref_logprob).abs()
+
+    if kl_penalty in ("mse", "k2"):
+        return 0.5 * (logprob - ref_logprob).square()
+
+    # J. Schulman. Approximating kl divergence, 2020.
+    # # URL http://joschu.net/blog/kl-approx.html.
+    if kl_penalty in ("low_var_kl", "k3"):
+        kl = ref_logprob - logprob
+        ratio = torch.exp(kl)
+        kld = (ratio - kl - 1).contiguous()
+        return torch.clamp(kld, min=-10, max=10)
+
+    if kl_penalty == "full":
+        # so, here logprob and ref_logprob should contain the logits for every token in vocabulary
+        raise NotImplementedError
+
+    raise NotImplementedError
+
+
+def compute_pf_ppo_reweight_data(
+    data,
+    reweight_method: str = "pow",
+    weight_pow: float = 2.0,
+):
+    """Reweight the data based on the token_level_scores.
+
+    Args:
+        data: DataProto object, containing batch, non_tensor_batch and meta_info
+        reweight_method: str, choices: "pow", "max_min", "max_random"
+        weight_pow: float, the power of the weight
+
+    Returns:
+
+    """
+
+    @torch.no_grad()
+    def compute_weights(scores: torch.Tensor, reweight_method: str, weight_pow: float) -> torch.Tensor:
+        if reweight_method == "pow":
+            weights = torch.pow(torch.abs(scores), weight_pow)
+        elif reweight_method == "max_min":
+            max_score = torch.max(scores)
+            min_score = torch.min(scores)
+            weights = torch.where((scores == max_score) | (scores == min_score), 1.0, 0.0)
+        elif reweight_method == "max_random":
+            max_score = torch.max(scores)
+            weights = torch.where(scores == max_score, 0.4, 0.1)
+        else:
+            raise ValueError(f"Unsupported reweight_method: {reweight_method}")
+        return weights
+
+    scores = data.batch["token_level_scores"].sum(dim=-1)
+    weights = compute_weights(scores, reweight_method, weight_pow)
+    weights = torch.clamp(weights + 1e-8, min=1e-8)
+
+    batch_size = scores.shape[0]
+    sample_indices = torch.multinomial(weights, batch_size, replacement=True)
+
+    resampled_batch = {key: tensor[sample_indices] for key, tensor in data.batch.items()}
+
+    sample_indices_np = sample_indices.numpy()
+    resampled_non_tensor_batch = {}
+    for key, array in data.non_tensor_batch.items():
+        if isinstance(array, np.ndarray):
+            resampled_non_tensor_batch[key] = array[sample_indices_np]
+        else:
+            resampled_non_tensor_batch[key] = [array[i] for i in sample_indices_np]
+
+    resampled_meta_info = {}
+    for key, value in data.meta_info.items():
+        if isinstance(value, list) and len(value) == batch_size:
+            resampled_meta_info[key] = [value[i] for i in sample_indices_np]
+        else:
+            resampled_meta_info[key] = value
+
+    from copy import deepcopy
+
+    resampled_data = deepcopy(data)
+    resampled_data.batch = type(data.batch)(resampled_batch)
+    resampled_data.batch.batch_size = data.batch.batch_size
+    resampled_data.non_tensor_batch = resampled_non_tensor_batch
+    resampled_data.meta_info = resampled_meta_info
+
+    return resampled_data
diff --git a/recipe/moe/moe_trainer/ppo/metric_utils.py b/recipe/moe/moe_trainer/ppo/metric_utils.py
new file mode 100644
index 00000000000..6ff42d7e02a
--- /dev/null
+++ b/recipe/moe/moe_trainer/ppo/metric_utils.py
@@ -0,0 +1,426 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Metrics related to the PPO trainer.
+"""
+
+from collections import defaultdict
+from functools import partial
+from typing import Any, Callable, Dict, List
+
+import numpy as np
+import torch
+
+from verl import DataProto
+from verl.utils.import_utils import deprecated
+
+
+@deprecated("verl.utils.metric.reduce_metrics")
+def reduce_metrics(metrics: Dict[str, List[Any]]) -> Dict[str, Any]:
+    """
+    Reduces a dictionary of metric lists by computing the mean of each list.
+
+    Args:
+        metrics: A dictionary mapping metric names to lists of metric values.
+
+    Returns:
+        A dictionary with the same keys but with each list replaced by its mean value.
+
+    Example:
+        >>> metrics = {"loss": [1.0, 2.0, 3.0], "accuracy": [0.8, 0.9, 0.7]}
+        >>> reduce_metrics(metrics)
+        {"loss": 2.0, "accuracy": 0.8}
+    """
+    from verl.utils.metric import reduce_metrics
+
+    return reduce_metrics(metrics)
+
+
+def _compute_response_info(batch: DataProto) -> Dict[str, Any]:
+    """
+    Computes information about prompts and responses from a batch.
+    
+    This is an internal helper function that extracts masks and lengths for prompts and responses.
+    
+    Args:
+        batch: A DataProto object containing batch data with responses and attention masks.
+        
+    Returns:
+        A dictionary containing:
+            - response_mask: Attention mask for the response tokens
+            - prompt_length: Tensor of prompt lengths for each item in the batch
+            - response_length: Tensor of response lengths for each item in the batch
+    """
+    response_length = batch.batch["responses"].shape[-1]
+
+    prompt_mask = batch.batch["attention_mask"][:, :-response_length]
+    response_mask = batch.batch["attention_mask"][:, -response_length:]
+
+    prompt_length = prompt_mask.sum(-1).float()
+    response_length = response_mask.sum(-1).float()  # (batch_size,)
+
+    return dict(
+        response_mask=response_mask,
+        prompt_length=prompt_length,
+        response_length=response_length,
+    )
+
+
+def compute_data_metrics(batch: DataProto, use_critic: bool = True) -> Dict[str, Any]:
+    """
+    Computes various metrics from a batch of data for PPO training.
+
+    This function calculates metrics related to scores, rewards, advantages, returns, values,
+    and sequence lengths from a batch of data. It provides statistical information (mean, max, min)
+    for each metric category.
+
+    Args:
+        batch: A DataProto object containing batch data with token-level scores, rewards, advantages, etc.
+        use_critic: Whether to include critic-specific metrics. Defaults to True.
+
+    Returns:
+        A dictionary of metrics including:
+            - critic/score/mean, max, min: Statistics about sequence scores
+            - critic/rewards/mean, max, min: Statistics about sequence rewards
+            - critic/advantages/mean, max, min: Statistics about advantages
+            - critic/returns/mean, max, min: Statistics about returns
+            - critic/values/mean, max, min: Statistics about critic values (if use_critic=True)
+            - critic/vf_explained_var: Explained variance of the value function (if use_critic=True)
+            - response_length/mean, max, min, clip_ratio: Statistics about response lengths
+            - prompt_length/mean, max, min, clip_ratio: Statistics about prompt lengths
+    """
+    sequence_score = batch.batch["token_level_scores"].sum(-1)
+    sequence_reward = batch.batch["token_level_rewards"].sum(-1)
+
+    advantages = batch.batch["advantages"]
+    returns = batch.batch["returns"]
+
+    max_response_length = batch.batch["responses"].shape[-1]
+
+    prompt_mask = batch.batch["attention_mask"][:, :-max_response_length].bool()
+    response_mask = batch.batch["attention_mask"][:, -max_response_length:].bool()
+
+    max_prompt_length = prompt_mask.size(-1)
+
+    response_info = _compute_response_info(batch)
+    prompt_length = response_info["prompt_length"]
+    response_length = response_info["response_length"]
+
+    valid_adv = torch.masked_select(advantages, response_mask)
+    valid_returns = torch.masked_select(returns, response_mask)
+
+    if use_critic:
+        values = batch.batch["values"]
+        valid_values = torch.masked_select(values, response_mask)
+        return_diff_var = torch.var(valid_returns - valid_values)
+        return_var = torch.var(valid_returns)
+
+    metrics = {
+        # score
+        "critic/score/mean": torch.mean(sequence_score).detach().item(),
+        "critic/score/max": torch.max(sequence_score).detach().item(),
+        "critic/score/min": torch.min(sequence_score).detach().item(),
+        # reward
+        "critic/rewards/mean": torch.mean(sequence_reward).detach().item(),
+        "critic/rewards/max": torch.max(sequence_reward).detach().item(),
+        "critic/rewards/min": torch.min(sequence_reward).detach().item(),
+        # adv
+        "critic/advantages/mean": torch.mean(valid_adv).detach().item(),
+        "critic/advantages/max": torch.max(valid_adv).detach().item(),
+        "critic/advantages/min": torch.min(valid_adv).detach().item(),
+        # returns
+        "critic/returns/mean": torch.mean(valid_returns).detach().item(),
+        "critic/returns/max": torch.max(valid_returns).detach().item(),
+        "critic/returns/min": torch.min(valid_returns).detach().item(),
+        **(
+            {
+                # values
+                "critic/values/mean": torch.mean(valid_values).detach().item(),
+                "critic/values/max": torch.max(valid_values).detach().item(),
+                "critic/values/min": torch.min(valid_values).detach().item(),
+                # vf explained var
+                "critic/vf_explained_var": (1.0 - return_diff_var / (return_var + 1e-5)).detach().item(),
+            }
+            if use_critic
+            else {}
+        ),
+        # response length
+        "response_length/mean": torch.mean(response_length).detach().item(),
+        "response_length/max": torch.max(response_length).detach().item(),
+        "response_length/min": torch.min(response_length).detach().item(),
+        "response_length/clip_ratio": torch.mean(torch.eq(response_length, max_response_length).float()).detach().item(),
+        # prompt length
+        "prompt_length/mean": torch.mean(prompt_length).detach().item(),
+        "prompt_length/max": torch.max(prompt_length).detach().item(),
+        "prompt_length/min": torch.min(prompt_length).detach().item(),
+        "prompt_length/clip_ratio": torch.mean(torch.eq(prompt_length, max_prompt_length).float()).detach().item(),
+    }
+    return metrics
+
+
+def compute_timing_metrics(batch: DataProto, timing_raw: Dict[str, float]) -> Dict[str, Any]:
+    """
+    Computes timing metrics for different processing stages in PPO training.
+    
+    This function calculates both raw timing metrics (in seconds) and per-token timing metrics 
+    (in milliseconds) for various processing stages like generation, reference computation, 
+    value computation, advantage computation, and model updates.
+
+    Args:
+        batch: A DataProto object containing batch data with responses and attention masks.
+        timing_raw: A dictionary mapping stage names to their execution times in seconds.
+
+    Returns:
+        A dictionary containing:
+            - timing_s/{name}: Raw timing in seconds for each stage
+            - timing_per_token_ms/{name}: Per-token timing in milliseconds for each stage
+
+    Note:
+        Different stages use different token counts for normalization:
+        - "gen" uses only response tokens
+        - Other stages ("ref", "values", "adv", "update_critic", "update_actor") use all tokens
+          (prompt + response)
+    """
+    response_info = _compute_response_info(batch)
+    num_prompt_tokens = torch.sum(response_info["prompt_length"]).item()
+    num_response_tokens = torch.sum(response_info["response_length"]).item()
+    num_overall_tokens = num_prompt_tokens + num_response_tokens
+
+    num_tokens_of_section = {
+        "gen": num_response_tokens,
+        **{name: num_overall_tokens for name in ["ref", "values", "adv", "update_critic", "update_actor"]},
+    }
+
+    return {
+        **{f"timing_s/{name}": value for name, value in timing_raw.items()},
+        **{f"timing_per_token_ms/{name}": timing_raw[name] * 1000 / num_tokens_of_section[name] for name in set(num_tokens_of_section.keys()) & set(timing_raw.keys())},
+    }
+
+
+def compute_throughout_metrics(batch: DataProto, timing_raw: Dict[str, float], n_gpus: int) -> Dict[str, Any]:
+    """
+    Computes throughput metrics for PPO training.
+    
+    This function calculates performance metrics related to token processing speed,
+    including the total number of tokens processed, time per step, and throughput
+    (tokens per second per GPU).
+    
+    Args:
+        batch: A DataProto object containing batch data with meta information about token counts.
+        timing_raw: A dictionary mapping stage names to their execution times in seconds.
+                   Must contain a "step" key with the total step time.
+        n_gpus: Number of GPUs used for training.
+        
+    Returns:
+        A dictionary containing:
+            - perf/total_num_tokens: Total number of tokens processed in the batch
+            - perf/time_per_step: Time taken for the step in seconds
+            - perf/throughput: Tokens processed per second per GPU
+            
+    Note:
+        The throughput is calculated as total_tokens / (time * n_gpus) to normalize
+        across different GPU counts.
+    """
+    total_num_tokens = sum(batch.meta_info["global_token_num"])
+    time = timing_raw["step"]
+    # estimated_flops, promised_flops = flops_function.estimate_flops(num_tokens, time)
+    # f'Actual TFLOPs/s/GPU​': estimated_flops/(n_gpus),
+    # f'Theoretical TFLOPs/s/GPU​': promised_flops,
+    return {
+        "perf/total_num_tokens": total_num_tokens,
+        "perf/time_per_step": time,
+        "perf/throughput": total_num_tokens / (time * n_gpus),
+    }
+
+
+def bootstrap_metric(
+    data: list[Any],
+    subset_size: int,
+    reduce_fns: list[Callable[[np.ndarray], float]],
+    n_bootstrap: int = 1000,
+    seed: int = 42,
+) -> list[tuple[float, float]]:
+    """
+    Performs bootstrap resampling to estimate statistics of metrics.
+
+    This function uses bootstrap resampling to estimate the mean and standard deviation
+    of metrics computed by the provided reduction functions on random subsets of the data.
+
+    Args:
+        data: List of data points to bootstrap from.
+        subset_size: Size of each bootstrap sample.
+        reduce_fns: List of functions that compute a metric from a subset of data.
+        n_bootstrap: Number of bootstrap iterations. Defaults to 1000.
+        seed: Random seed for reproducibility. Defaults to 42.
+
+    Returns:
+        A list of tuples, where each tuple contains (mean, std) for a metric
+        corresponding to each reduction function in reduce_fns.
+
+    Example:
+        >>> data = [1, 2, 3, 4, 5]
+        >>> reduce_fns = [np.mean, np.max]
+        >>> bootstrap_metric(data, 3, reduce_fns)
+        [(3.0, 0.5), (4.5, 0.3)]  # Example values
+    """
+    np.random.seed(seed)
+
+    bootstrap_metric_lsts = [[] for _ in range(len(reduce_fns))]
+    for _ in range(n_bootstrap):
+        bootstrap_idxs = np.random.choice(len(data), size=subset_size, replace=True)
+        bootstrap_data = [data[i] for i in bootstrap_idxs]
+        for i, reduce_fn in enumerate(reduce_fns):
+            bootstrap_metric_lsts[i].append(reduce_fn(bootstrap_data))
+    return [(np.mean(lst), np.std(lst)) for lst in bootstrap_metric_lsts]
+
+
+def calc_maj_val(data: list[dict[str, Any]], vote_key: str, val_key: str) -> float:
+    """
+    Calculate a value based on majority voting.
+
+    This function identifies the most common value for a specified vote key
+    in the data, then returns the corresponding value for that majority vote.
+
+    Args:
+        data: List of dictionaries, where each dictionary contains both vote_key and val_key.
+        vote_key: The key in each dictionary used for voting/counting.
+        val_key: The key in each dictionary whose value will be returned for the majority vote.
+
+    Returns:
+        The value associated with the most common vote.
+
+    Example:
+        >>> data = [
+        ...     {"pred": "A", "val": 0.9},
+        ...     {"pred": "B", "val": 0.8},
+        ...     {"pred": "A", "val": 0.7}
+        ... ]
+        >>> calc_maj_val(data, vote_key="pred", val_key="val")
+        0.9  # Returns the first "val" for the majority vote "A"
+    """
+    vote2vals = defaultdict(list)
+    for d in data:
+        vote2vals[d[vote_key]].append(d[val_key])
+
+    vote2cnt = {k: len(v) for k, v in vote2vals.items()}
+    maj_vote = max(vote2cnt, key=vote2cnt.get)
+
+    maj_val = vote2vals[maj_vote][0]
+
+    return maj_val
+
+
+def process_validation_metrics(data_sources: list[str], sample_inputs: list[str], infos_dict: dict[str, list[Any]], seed: int = 42) -> dict[str, dict[str, dict[str, float]]]:
+    """
+    Process validation metrics into a structured format with statistical analysis.
+    
+    This function organizes validation metrics by data source and prompt, then computes
+    various statistical measures including means, standard deviations, best/worst values,
+    and majority voting results. It also performs bootstrap sampling to estimate statistics
+    for different sample sizes.
+    
+    Args:
+        data_sources: List of data source identifiers for each sample.
+        sample_inputs: List of input prompts corresponding to each sample.
+        infos_dict: Dictionary mapping variable names to lists of values for each sample.
+        seed: Random seed for bootstrap sampling. Defaults to 42.
+
+    Returns:
+        A nested dictionary with the structure:
+        {
+            data_source: {
+                variable_name: {
+                    metric_name: value
+                }
+            }
+        }
+        
+        Where metric_name includes:
+        - "mean@N": Mean value across N samples
+        - "std@N": Standard deviation across N samples
+        - "best@N/mean": Mean of the best values in bootstrap samples of size N
+        - "best@N/std": Standard deviation of the best values in bootstrap samples
+        - "worst@N/mean": Mean of the worst values in bootstrap samples
+        - "worst@N/std": Standard deviation of the worst values in bootstrap samples
+        - "maj@N/mean": Mean of majority voting results in bootstrap samples (if "pred" exists)
+        - "maj@N/std": Standard deviation of majority voting results (if "pred" exists)
+        
+    Example:
+        >>> data_sources = ["source1", "source1", "source2"]
+        >>> sample_inputs = ["prompt1", "prompt1", "prompt2"]
+        >>> infos_dict = {"score": [0.8, 0.9, 0.7], "pred": ["A", "A", "B"]}
+        >>> result = process_validation_metrics(data_sources, sample_inputs, infos_dict)
+        >>> # result will contain statistics for each data source and variable
+    """
+    # Group metrics by data source, prompt and variable
+    data_src2prompt2var2vals = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
+    for sample_idx, data_source in enumerate(data_sources):
+        prompt = sample_inputs[sample_idx]
+        var2vals = data_src2prompt2var2vals[data_source][prompt]
+        for var_name, var_vals in infos_dict.items():
+            var2vals[var_name].append(var_vals[sample_idx])
+
+    # Calculate metrics for each group
+    data_src2prompt2var2metric = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
+    for data_source, prompt2var2vals in data_src2prompt2var2vals.items():
+        for prompt, var2vals in prompt2var2vals.items():
+            for var_name, var_vals in var2vals.items():
+                if isinstance(var_vals[0], str):
+                    continue
+
+                metric = {}
+                n_resps = len(var_vals)
+                metric[f"mean@{n_resps}"] = np.mean(var_vals)
+
+                if n_resps > 1:
+                    metric[f"std@{n_resps}"] = np.std(var_vals)
+
+                    ns = []
+                    n = 2
+                    while n < n_resps:
+                        ns.append(n)
+                        n *= 2
+                    ns.append(n_resps)
+
+                    for n in ns:
+                        [(bon_mean, bon_std), (won_mean, won_std)] = bootstrap_metric(data=var_vals, subset_size=n, reduce_fns=[np.max, np.min], seed=seed)
+                        metric[f"best@{n}/mean"], metric[f"best@{n}/std"] = bon_mean, bon_std
+                        metric[f"worst@{n}/mean"], metric[f"worst@{n}/std"] = won_mean, won_std
+                        if var2vals.get("pred", None) is not None:
+                            vote_data = [{"val": val, "pred": pred} for val, pred in zip(var_vals, var2vals["pred"])]
+                            [(maj_n_mean, maj_n_std)] = bootstrap_metric(
+                                data=vote_data,
+                                subset_size=n,
+                                reduce_fns=[partial(calc_maj_val, vote_key="pred", val_key="val")],
+                                seed=seed,
+                            )
+                            metric[f"maj@{n}/mean"], metric[f"maj@{n}/std"] = maj_n_mean, maj_n_std
+
+                data_src2prompt2var2metric[data_source][prompt][var_name] = metric
+
+    # Aggregate metrics across prompts
+    data_src2var2metric2prompt_vals = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
+    for data_source, prompt2var2metric in data_src2prompt2var2metric.items():
+        for prompt, var2metric in prompt2var2metric.items():
+            for var_name, metric in var2metric.items():
+                for metric_name, metric_val in metric.items():
+                    data_src2var2metric2prompt_vals[data_source][var_name][metric_name].append(metric_val)
+
+    data_src2var2metric2val = defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
+    for data_source, var2metric2prompt_vals in data_src2var2metric2prompt_vals.items():
+        for var_name, metric2prompt_vals in var2metric2prompt_vals.items():
+            for metric_name, prompt_vals in metric2prompt_vals.items():
+                data_src2var2metric2val[data_source][var_name][metric_name] = np.mean(prompt_vals)
+
+    return data_src2var2metric2val
diff --git a/recipe/moe/moe_trainer/ppo/ray_trainer.py b/recipe/moe/moe_trainer/ppo/ray_trainer.py
new file mode 100644
index 00000000000..f71757faa16
--- /dev/null
+++ b/recipe/moe/moe_trainer/ppo/ray_trainer.py
@@ -0,0 +1,1216 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+# Copyright 2023-2024 SGLang Team
+# Copyright 2025 ModelBest Inc. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+FSDP PPO Trainer with Ray-based single controller.
+This trainer supports model-agonistic model initialization with huggingface
+"""
+
+import json
+import os
+import uuid
+from collections import defaultdict
+from contextlib import contextmanager
+from copy import deepcopy
+from dataclasses import dataclass, field
+from enum import Enum
+from pprint import pprint
+from typing import Dict, Optional, Type
+
+import numpy as np
+import ray
+import torch
+from codetiming import Timer
+from omegaconf import OmegaConf, open_dict
+from torch.utils.data import Dataset, Sampler
+from torchdata.stateful_dataloader import StatefulDataLoader
+from tqdm import tqdm
+
+from verl import DataProto
+from verl.protocol import pad_dataproto_to_divisor, unpad_dataproto
+from verl.single_controller.base import Worker
+from verl.single_controller.ray import RayClassWithInitArgs, RayResourcePool, RayWorkerGroup
+from verl.single_controller.ray.base import create_colocated_worker_cls
+from verl.trainer.ppo import core_algos
+from verl.trainer.ppo.core_algos import agg_loss
+from verl.trainer.ppo.metric_utils import (
+    compute_data_metrics,
+    compute_throughout_metrics,
+    compute_timing_metrics,
+    process_validation_metrics,
+)
+from verl.trainer.ppo.reward import compute_reward, compute_reward_async
+from verl.utils.checkpoint.checkpoint_manager import BaseCheckpointManager, find_latest_ckpt_path
+from verl.utils.metric import (
+    reduce_metrics,
+)
+from verl.utils.seqlen_balancing import get_seqlen_balanced_partitions, log_seqlen_unbalance
+from verl.utils.torch_functional import masked_mean
+from verl.utils.tracking import ValidationGenerationsLogger
+
+
+WorkerType = Type[Worker]
+
+
+class Role(Enum):
+    """
+    To create more roles dynamically, you can subclass Role and add new members
+    """
+
+    Actor = 0
+    Rollout = 1
+    ActorRollout = 2
+    Critic = 3
+    RefPolicy = 4
+    RewardModel = 5
+    ActorRolloutRef = 6
+
+
+class AdvantageEstimator(str, Enum):
+    """
+    Using an enumeration class to avoid spelling errors in adv_estimator
+    """
+
+    GAE = "gae"
+    GRPO = "grpo"
+    REINFORCE_PLUS_PLUS = "reinforce_plus_plus"
+    REINFORCE_PLUS_PLUS_BASELINE = "reinforce_plus_plus_baseline"
+    REMAX = "remax"
+    RLOO = "rloo"
+    OPO = "opo"
+    GRPO_PASSK = "grpo_passk"
+
+
+@dataclass
+class ResourcePoolManager:
+    """
+    Define a resource pool specification. Resource pool will be initialized first.
+    """
+
+    resource_pool_spec: dict[str, list[int]]
+    mapping: dict[Role, str]
+    resource_pool_dict: dict[str, RayResourcePool] = field(default_factory=dict)
+
+    def create_resource_pool(self):
+        for resource_pool_name, process_on_nodes in self.resource_pool_spec.items():
+            # max_colocate_count means the number of WorkerGroups (i.e. processes) in each RayResourcePool
+            # For FSDP backend, we recommend using max_colocate_count=1 that merge all WorkerGroups into one.
+            # For Megatron backend, we recommend using max_colocate_count>1
+            # that can utilize different WorkerGroup for differnt models
+            resource_pool = RayResourcePool(process_on_nodes=process_on_nodes, use_gpu=True, max_colocate_count=1, name_prefix=resource_pool_name)
+            self.resource_pool_dict[resource_pool_name] = resource_pool
+
+        self._check_resource_available()
+
+    def get_resource_pool(self, role: Role) -> RayResourcePool:
+        """Get the resource pool of the worker_cls"""
+        return self.resource_pool_dict[self.mapping[role]]
+
+    def get_n_gpus(self) -> int:
+        """Get the number of gpus in this cluster."""
+        return sum([n_gpus for process_on_nodes in self.resource_pool_spec.values() for n_gpus in process_on_nodes])
+
+    def _check_resource_available(self):
+        """Check if the resource pool can be satisfied in this ray cluster."""
+        node_available_resources = ray.state.available_resources_per_node()
+        node_available_gpus = {node: node_info.get("GPU", 0) if "GPU" in node_info else node_info.get("NPU", 0) for node, node_info in node_available_resources.items()}
+
+        # check total required gpus can be satisfied
+        total_available_gpus = sum(node_available_gpus.values())
+        total_required_gpus = sum([n_gpus for process_on_nodes in self.resource_pool_spec.values() for n_gpus in process_on_nodes])
+        if total_available_gpus < total_required_gpus:
+            raise ValueError(f"Total available GPUs {total_available_gpus} is less than total desired GPUs {total_required_gpus}")
+
+        # check each resource pool can be satisfied, O(#resource_pools * #nodes)
+        for resource_pool_name, process_on_nodes in self.resource_pool_spec.items():
+            num_gpus, num_nodes = process_on_nodes[0], len(process_on_nodes)
+            for node, available_gpus in node_available_gpus.items():
+                if available_gpus >= num_gpus:
+                    node_available_gpus[node] -= num_gpus
+                    num_nodes -= 1
+                    if num_nodes == 0:
+                        break
+            if num_nodes > 0:
+                raise ValueError(f"Resource pool {resource_pool_name}: {num_gpus}*{num_nodes}" + "cannot be satisfied in this ray cluster")
+
+
+def apply_kl_penalty(data: DataProto, kl_ctrl: core_algos.AdaptiveKLController, kl_penalty="kl", multi_turn=False):
+    """Apply KL penalty to the token-level rewards.
+
+    This function computes the KL divergence between the reference policy and current policy,
+    then applies a penalty to the token-level rewards based on this divergence.
+
+    Args:
+        data (DataProto): The data containing batched model outputs and inputs.
+        kl_ctrl (core_algos.AdaptiveKLController): Controller for adaptive KL penalty.
+        kl_penalty (str, optional): Type of KL penalty to apply. Defaults to "kl".
+        multi_turn (bool, optional): Whether the data is from a multi-turn conversation. Defaults to False.
+
+    Returns:
+        tuple: A tuple containing:
+            - The updated data with token-level rewards adjusted by KL penalty
+            - A dictionary of metrics related to the KL penalty
+    """
+    responses = data.batch["responses"]
+    response_length = responses.size(1)
+    token_level_scores = data.batch["token_level_scores"]
+    batch_size = data.batch.batch_size[0]
+
+    if multi_turn:
+        loss_mask = data.batch["loss_mask"]
+        response_mask = loss_mask[:, -response_length:]
+    else:
+        attention_mask = data.batch["attention_mask"]
+        response_mask = attention_mask[:, -response_length:]
+
+    # compute kl between ref_policy and current policy
+    # When apply_kl_penalty, algorithm.use_kl_in_reward=True, so the reference model has been enabled.
+    kld = core_algos.kl_penalty(data.batch["old_log_probs"], data.batch["ref_log_prob"], kl_penalty=kl_penalty)  # (batch_size, response_length)
+    kld = kld * response_mask
+    beta = kl_ctrl.value
+
+    token_level_rewards = token_level_scores - beta * kld
+
+    current_kl = masked_mean(kld, mask=response_mask, axis=-1)  # average over sequence
+    current_kl = torch.mean(current_kl, dim=0).item()
+
+    # according to https://github.com/huggingface/trl/blob/951ca1841f29114b969b57b26c7d3e80a39f75a0/trl/trainer/ppo_trainer.py#L837
+    kl_ctrl.update(current_kl=current_kl, n_steps=batch_size)
+    data.batch["token_level_rewards"] = token_level_rewards
+
+    metrics = {"actor/reward_kl_penalty": current_kl, "actor/reward_kl_penalty_coeff": beta}
+
+    return data, metrics
+
+
+def compute_response_mask(data: DataProto):
+    """Compute the attention mask for the response part of the sequence.
+
+    This function extracts the portion of the attention mask that corresponds to the model's response,
+    which is used for masking computations that should only apply to response tokens.
+
+    Args:
+        data (DataProto): The data containing batched model outputs and inputs.
+
+    Returns:
+        torch.Tensor: The attention mask for the response tokens.
+    """
+    responses = data.batch["responses"]
+    response_length = responses.size(1)
+    attention_mask = data.batch["attention_mask"]
+    return attention_mask[:, -response_length:]
+
+
+def compute_advantage(data: DataProto, adv_estimator, gamma=1.0, lam=1.0, num_repeat=1, multi_turn=False, norm_adv_by_std_in_grpo=True, **kwargs):
+    """Compute advantage estimates for policy optimization.
+
+    This function computes advantage estimates using various estimators like GAE, GRPO, REINFORCE++, etc.
+    The advantage estimates are used to guide policy optimization in RL algorithms.
+
+    Args:
+        data (DataProto): The data containing batched model outputs and inputs.
+        adv_estimator: The advantage estimator to use (e.g., GAE, GRPO, REINFORCE++).
+        gamma (float, optional): Discount factor for future rewards. Defaults to 1.0.
+        lam (float, optional): Lambda parameter for GAE. Defaults to 1.0.
+        num_repeat (int, optional): Number of times to repeat the computation. Defaults to 1.
+        multi_turn (bool, optional): Whether the data is from a multi-turn conversation. Defaults to False.
+        norm_adv_by_std_in_grpo (bool, optional): Whether to normalize advantages by standard deviation in GRPO. Defaults to True.
+
+    Returns:
+        DataProto: The updated data with computed advantages and returns.
+    """
+    # Back-compatible with trainers that do not compute response mask in fit
+    if "response_mask" not in data.batch:
+        data.batch["response_mask"] = compute_response_mask(data)
+    # prepare response group
+    # TODO: add other ways to estimate advantages
+    if adv_estimator == AdvantageEstimator.GAE:
+        advantages, returns = core_algos.compute_gae_advantage_return(
+            token_level_rewards=data.batch["token_level_rewards"],
+            values=data.batch["values"],
+            response_mask=data.batch["response_mask"],
+            gamma=gamma,
+            lam=lam,
+        )
+        data.batch["advantages"] = advantages
+        data.batch["returns"] = returns
+        if kwargs.get("use_pf_ppo", False):
+            data = core_algos.compute_pf_ppo_reweight_data(
+                data,
+                kwargs.get("pf_ppo_reweight_method", "pow"),
+                kwargs.get("pf_ppo_weight_pow", 2.0),
+            )
+    elif adv_estimator == AdvantageEstimator.GRPO:
+        # TODO: test on more adv estimator type
+        grpo_calculation_mask = data.batch["response_mask"]
+        if multi_turn:
+            # If multi-turn, replace the mask with the relevant part of loss_mask
+            response_length = grpo_calculation_mask.size(1)  # Get length from the initial response mask
+            grpo_calculation_mask = data.batch["loss_mask"][:, -response_length:]  # This mask is the one intended for GRPO
+        # Call compute_grpo_outcome_advantage with parameters matching its definition
+        advantages, returns = core_algos.compute_grpo_outcome_advantage(
+            token_level_rewards=data.batch["token_level_rewards"],
+            response_mask=grpo_calculation_mask,
+            index=data.non_tensor_batch["uid"],
+            norm_adv_by_std_in_grpo=norm_adv_by_std_in_grpo,
+        )
+        data.batch["advantages"] = advantages
+        data.batch["returns"] = returns
+    elif adv_estimator == AdvantageEstimator.GRPO_PASSK:
+        advantages, returns = core_algos.compute_grpo_passk_outcome_advantage(
+            token_level_rewards=data.batch["token_level_rewards"],
+            response_mask=data.batch["response_mask"],
+            index=data.non_tensor_batch["uid"],
+            norm_adv_by_std_in_grpo=norm_adv_by_std_in_grpo,
+        )
+        data.batch["advantages"] = advantages
+        data.batch["returns"] = returns
+    elif adv_estimator == AdvantageEstimator.REINFORCE_PLUS_PLUS_BASELINE:
+        advantages, returns = core_algos.compute_reinforce_plus_plus_baseline_outcome_advantage(
+            token_level_rewards=data.batch["token_level_rewards"],
+            response_mask=data.batch["response_mask"],
+            index=data.non_tensor_batch["uid"],
+        )
+        data.batch["advantages"] = advantages
+        data.batch["returns"] = returns
+    elif adv_estimator == AdvantageEstimator.REINFORCE_PLUS_PLUS:
+        advantages, returns = core_algos.compute_reinforce_plus_plus_outcome_advantage(
+            token_level_rewards=data.batch["token_level_rewards"],
+            response_mask=data.batch["response_mask"],
+            gamma=gamma,
+        )
+        data.batch["advantages"] = advantages
+        data.batch["returns"] = returns
+    elif adv_estimator == AdvantageEstimator.REMAX:
+        advantages, returns = core_algos.compute_remax_outcome_advantage(
+            token_level_rewards=data.batch["token_level_rewards"],
+            reward_baselines=data.batch["reward_baselines"],
+            response_mask=data.batch["response_mask"],
+        )
+
+        data.batch["advantages"] = advantages
+        data.batch["returns"] = returns
+    elif adv_estimator == AdvantageEstimator.RLOO:
+        advantages, returns = core_algos.compute_rloo_outcome_advantage(
+            token_level_rewards=data.batch["token_level_rewards"],
+            response_mask=data.batch["response_mask"],
+            index=data.non_tensor_batch["uid"],
+        )
+        data.batch["advantages"] = advantages
+        data.batch["returns"] = returns
+    elif adv_estimator == AdvantageEstimator.OPO:
+        advantages, returns = core_algos.compute_opo_outcome_advantage(
+            token_level_rewards=data.batch["token_level_rewards"],
+            response_mask=data.batch["response_mask"],
+            index=data.non_tensor_batch["uid"],
+        )
+        data.batch["advantages"] = advantages
+        data.batch["returns"] = returns
+    else:
+        raise NotImplementedError
+    return data
+
+
+@contextmanager
+def _timer(name: str, timing_raw: Dict[str, float]):
+    """Context manager for timing code execution.
+
+    This utility function measures the execution time of code within its context
+    and accumulates the timing information in the provided dictionary.
+
+    Args:
+        name (str): The name/identifier for this timing measurement.
+        timing_raw (Dict[str, float]): Dictionary to store timing information.
+
+    Yields:
+        None: This is a context manager that yields control back to the code block.
+    """
+    with Timer(name=name, logger=None) as timer:
+        yield
+    if name not in timing_raw:
+        timing_raw[name] = 0
+    timing_raw[name] += timer.last
+
+
+class RayPPOTrainer:
+    """
+    Note that this trainer runs on the driver process on a single CPU/GPU node.
+    """
+
+    # TODO: support each role have individual ray_worker_group_cls,
+    # i.e., support different backend of different role
+    def __init__(
+        self,
+        config,
+        tokenizer,
+        role_worker_mapping: dict[Role, WorkerType],
+        resource_pool_manager: ResourcePoolManager,
+        ray_worker_group_cls: RayWorkerGroup = RayWorkerGroup,
+        processor=None,
+        reward_fn=None,
+        val_reward_fn=None,
+        train_dataset: Optional[Dataset] = None,
+        val_dataset: Optional[Dataset] = None,
+        collate_fn=None,
+        train_sampler: Optional[Sampler] = None,
+        device_name="cuda",
+    ):
+        """Initialize distributed PPO trainer with Ray backend."""
+
+        self.tokenizer = tokenizer
+        self.processor = processor
+        self.config = config
+        self.reward_fn = reward_fn
+        self.val_reward_fn = val_reward_fn
+
+        self.hybrid_engine = config.actor_rollout_ref.hybrid_engine
+        assert self.hybrid_engine, "Currently, only support hybrid engine"
+
+        if self.hybrid_engine:
+            assert Role.ActorRollout in role_worker_mapping, f"{role_worker_mapping.keys()=}"
+
+        self.role_worker_mapping = role_worker_mapping
+        self.resource_pool_manager = resource_pool_manager
+        self.use_reference_policy = Role.RefPolicy in role_worker_mapping
+        self.use_rm = Role.RewardModel in role_worker_mapping
+        self.ray_worker_group_cls = ray_worker_group_cls
+        self.device_name = device_name
+        self.validation_generations_logger = ValidationGenerationsLogger()
+
+        # if ref_in_actor is True, the reference policy will be actor without lora applied
+        self.ref_in_actor = config.actor_rollout_ref.model.get("lora_rank", 0) > 0
+
+        # define in-reward KL control
+        # kl loss control currently not suppoorted
+        if config.algorithm.use_kl_in_reward:
+            self.kl_ctrl_in_reward = core_algos.get_kl_controller(config.algorithm.kl_ctrl)
+
+        if self.config.algorithm.adv_estimator == AdvantageEstimator.GAE:
+            self.use_critic = True
+        elif self.config.algorithm.adv_estimator in [
+            AdvantageEstimator.GRPO,
+            AdvantageEstimator.GRPO_PASSK,
+            AdvantageEstimator.REINFORCE_PLUS_PLUS,
+            AdvantageEstimator.REMAX,
+            AdvantageEstimator.RLOO,
+            AdvantageEstimator.OPO,
+            AdvantageEstimator.REINFORCE_PLUS_PLUS_BASELINE,
+        ]:
+            self.use_critic = False
+        else:
+            raise NotImplementedError
+
+        self._validate_config()
+        self._create_dataloader(train_dataset, val_dataset, collate_fn, train_sampler)
+
+    def _validate_config(self):
+        config = self.config
+        # number of GPUs total
+        n_gpus = config.trainer.n_gpus_per_node * config.trainer.nnodes
+        if config.actor_rollout_ref.actor.strategy == "megatron":
+            model_parallel_size = config.actor_rollout_ref.actor.megatron.tensor_model_parallel_size * config.actor_rollout_ref.actor.megatron.pipeline_model_parallel_size
+            assert n_gpus % (model_parallel_size * config.actor_rollout_ref.actor.megatron.context_parallel_size) == 0, f"n_gpus ({n_gpus}) must be divisible by model_parallel_size ({model_parallel_size}) times context_parallel_size ({config.actor_rollout_ref.actor.megatron.context_parallel_size})"
+            megatron_dp = n_gpus // (model_parallel_size * config.actor_rollout_ref.actor.megatron.context_parallel_size)
+            minimal_bsz = megatron_dp * config.actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu
+        else:
+            minimal_bsz = n_gpus
+
+        # 1. Check total batch size for data correctness
+        real_train_batch_size = config.data.train_batch_size * config.actor_rollout_ref.rollout.n
+        assert real_train_batch_size % minimal_bsz == 0, f"real_train_batch_size ({real_train_batch_size}) must be divisible by minimal possible batch size ({minimal_bsz})"
+
+        # A helper function to check "micro_batch_size" vs "micro_batch_size_per_gpu"
+        # We throw an error if the user sets both. The new convention is "..._micro_batch_size_per_gpu".
+        def check_mutually_exclusive(mbs, mbs_per_gpu, name: str):
+            settings = {
+                "actor_rollout_ref.actor": "micro_batch_size",
+                "critic": "micro_batch_size",
+                "reward_model": "micro_batch_size",
+                "actor_rollout_ref.ref": "log_prob_micro_batch_size",
+                "actor_rollout_ref.rollout": "log_prob_micro_batch_size",
+            }
+
+            if name in settings:
+                param = settings[name]
+                param_per_gpu = f"{param}_per_gpu"
+
+                if mbs is None and mbs_per_gpu is None:
+                    raise ValueError(f"[{name}] Please set at least one of '{name}.{param}' or '{name}.{param_per_gpu}'.")
+
+                if mbs is not None and mbs_per_gpu is not None:
+                    raise ValueError(f"[{name}] You have set both '{name}.{param}' AND '{name}.{param_per_gpu}'. Please remove '{name}.{param}' because only '*_{param_per_gpu}'" + "is supported (the former is deprecated).")
+
+        if not config.actor_rollout_ref.actor.use_dynamic_bsz:
+            # actor: ppo_micro_batch_size vs. ppo_micro_batch_size_per_gpu
+            check_mutually_exclusive(
+                config.actor_rollout_ref.actor.ppo_micro_batch_size,
+                config.actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu,
+                "actor_rollout_ref.actor",
+            )
+
+            if self.use_reference_policy:
+                # reference: log_prob_micro_batch_size vs. log_prob_micro_batch_size_per_gpu
+                check_mutually_exclusive(
+                    config.actor_rollout_ref.ref.log_prob_micro_batch_size,
+                    config.actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu,
+                    "actor_rollout_ref.ref",
+                )
+
+            #  The rollout section also has log_prob_micro_batch_size vs. log_prob_micro_batch_size_per_gpu
+            check_mutually_exclusive(
+                config.actor_rollout_ref.rollout.log_prob_micro_batch_size,
+                config.actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu,
+                "actor_rollout_ref.rollout",
+            )
+
+        if self.use_critic and not config.critic.use_dynamic_bsz:
+            # Check for critic micro-batch size conflicts
+            check_mutually_exclusive(config.critic.ppo_micro_batch_size, config.critic.ppo_micro_batch_size_per_gpu, "critic")
+
+        # Check for reward model micro-batch size conflicts
+        if config.reward_model.enable and not config.reward_model.use_dynamic_bsz:
+            check_mutually_exclusive(config.reward_model.micro_batch_size, config.reward_model.micro_batch_size_per_gpu, "reward_model")
+
+        # Actor
+        # check if train_batch_size is larger than ppo_mini_batch_size
+        # if NOT dynamic_bsz, we must ensure:
+        #    ppo_mini_batch_size is divisible by ppo_micro_batch_size
+        #    ppo_micro_batch_size * sequence_parallel_size >= n_gpus
+        if not config.actor_rollout_ref.actor.use_dynamic_bsz:
+            assert config.data.train_batch_size >= config.actor_rollout_ref.actor.ppo_mini_batch_size
+            sp_size = config.actor_rollout_ref.actor.get("ulysses_sequence_parallel_size", 1)
+            if config.actor_rollout_ref.actor.ppo_micro_batch_size is not None:
+                assert config.actor_rollout_ref.actor.ppo_mini_batch_size % config.actor_rollout_ref.actor.ppo_micro_batch_size == 0
+                assert config.actor_rollout_ref.actor.ppo_micro_batch_size * sp_size >= n_gpus
+
+        assert config.actor_rollout_ref.actor.loss_agg_mode in [
+            "token-mean",
+            "seq-mean-token-sum",
+            "seq-mean-token-mean",
+            "seq-mean-token-sum-norm",
+        ], f"Invalid loss_agg_mode: {config.actor_rollout_ref.actor.loss_agg_mode}"
+
+        if config.algorithm.use_kl_in_reward and config.actor_rollout_ref.actor.use_kl_loss:
+            print("NOTICE: You have both enabled in-reward kl and kl loss.")
+
+        # critic
+        if self.use_critic and not config.critic.use_dynamic_bsz:
+            assert config.data.train_batch_size >= config.critic.ppo_mini_batch_size
+            sp_size = config.critic.get("ulysses_sequence_parallel_size", 1)
+            if config.critic.ppo_micro_batch_size is not None:
+                assert config.critic.ppo_mini_batch_size % config.critic.ppo_micro_batch_size == 0
+                assert config.critic.ppo_micro_batch_size * sp_size >= n_gpus
+
+        # Check if use_remove_padding is enabled when using sequence parallelism for fsdp
+        if config.actor_rollout_ref.actor.strategy == "fsdp" and (config.actor_rollout_ref.actor.get("ulysses_sequence_parallel_size", 1) > 1 or config.actor_rollout_ref.ref.get("ulysses_sequence_parallel_size", 1) > 1):
+            assert config.actor_rollout_ref.model.use_remove_padding, "When using sequence parallelism for actor/ref policy, you must enable `use_remove_padding`."
+
+        if self.use_critic and config.critic.strategy == "fsdp":
+            if config.critic.get("ulysses_sequence_parallel_size", 1) > 1:
+                assert config.critic.model.use_remove_padding, "When using sequence parallelism for critic, you must enable `use_remove_padding`."
+
+        if config.data.get("val_batch_size", None) is not None:
+            print("WARNING: val_batch_size is deprecated." + " Validation datasets are sent to inference engines as a whole batch," + " which will schedule the memory themselves.")
+
+        # check eval config
+        if config.actor_rollout_ref.rollout.val_kwargs.do_sample:
+            assert config.actor_rollout_ref.rollout.temperature > 0, "validation gen temperature should be greater than 0 when enabling do_sample"
+
+        # check multi_turn with tool config
+        if config.actor_rollout_ref.rollout.multi_turn.enable:
+            assert config.actor_rollout_ref.rollout.multi_turn.tool_config_path is not None, "tool_config_path must be set when enabling multi_turn with tool, due to no role-playing support"
+            assert config.algorithm.adv_estimator in [AdvantageEstimator.GRPO], "only GRPO is tested for multi-turn with tool"
+
+        print("[validate_config] All configuration checks passed successfully!")
+
+    def _create_dataloader(self, train_dataset, val_dataset, collate_fn, train_sampler):
+        """
+        Creates the train and validation dataloaders.
+        """
+        # TODO: we have to make sure the batch size is divisible by the dp size
+        from verl.trainer.main_ppo import create_rl_dataset, create_rl_sampler
+
+        if train_dataset is None:
+            train_dataset = create_rl_dataset(self.config.data.train_files, self.config.data, self.tokenizer, self.processor)
+        if val_dataset is None:
+            val_dataset = create_rl_dataset(self.config.data.val_files, self.config.data, self.tokenizer, self.processor)
+        self.train_dataset, self.val_dataset = train_dataset, val_dataset
+
+        if train_sampler is None:
+            train_sampler = create_rl_sampler(self.config.data, self.train_dataset)
+        if collate_fn is None:
+            from verl.utils.dataset.rl_dataset import collate_fn as default_collate_fn
+
+            collate_fn = default_collate_fn
+
+        self.train_dataloader = StatefulDataLoader(
+            dataset=self.train_dataset,
+            batch_size=self.config.data.get("gen_batch_size", self.config.data.train_batch_size),
+            num_workers=self.config.data.get("dataloader_num_workers", 8),
+            drop_last=True,
+            collate_fn=collate_fn,
+            sampler=train_sampler,
+        )
+
+        val_batch_size = self.config.data.val_batch_size  # Prefer config value if set
+        if val_batch_size is None:
+            val_batch_size = len(self.val_dataset)
+
+        self.val_dataloader = StatefulDataLoader(
+            dataset=self.val_dataset,
+            batch_size=val_batch_size,
+            num_workers=self.config.data.get("dataloader_num_workers", 8),
+            shuffle=False,
+            drop_last=False,
+            collate_fn=collate_fn,
+        )
+
+        assert len(self.train_dataloader) >= 1, "Train dataloader is empty!"
+        assert len(self.val_dataloader) >= 1, "Validation dataloader is empty!"
+
+        print(f"Size of train dataloader: {len(self.train_dataloader)}, Size of val dataloader: {len(self.val_dataloader)}")
+
+        total_training_steps = len(self.train_dataloader) * self.config.trainer.total_epochs
+
+        if self.config.trainer.total_training_steps is not None:
+            total_training_steps = self.config.trainer.total_training_steps
+
+        self.total_training_steps = total_training_steps
+        print(f"Total training steps: {self.total_training_steps}")
+
+        try:
+            OmegaConf.set_struct(self.config, True)
+            with open_dict(self.config):
+                if OmegaConf.select(self.config, "actor_rollout_ref.actor.optim"):
+                    self.config.actor_rollout_ref.actor.optim.total_training_steps = total_training_steps
+                if OmegaConf.select(self.config, "critic.optim"):
+                    self.config.critic.optim.total_training_steps = total_training_steps
+        except Exception as e:
+            print(f"Warning: Could not set total_training_steps in config. Structure missing? Error: {e}")
+
+    def _dump_generations(self, inputs, outputs, scores, reward_extra_infos_dict, dump_path):
+        """Dump rollout/validation samples as JSONL."""
+        os.makedirs(dump_path, exist_ok=True)
+        filename = os.path.join(dump_path, f"{self.global_steps}.jsonl")
+
+        n = len(inputs)
+        base_data = {
+            "input": inputs,
+            "output": outputs,
+            "score": scores,
+            "step": [self.global_steps] * n,
+        }
+
+        for k, v in reward_extra_infos_dict.items():
+            if len(v) == n:
+                base_data[k] = v
+
+        with open(filename, "w") as f:
+            for i in range(n):
+                entry = {k: v[i] for k, v in base_data.items()}
+                f.write(json.dumps(entry, ensure_ascii=False) + "\n")
+
+        print(f"Dumped generations to {filename}")
+
+    def _maybe_log_val_generations(self, inputs, outputs, scores):
+        """Log a table of validation samples to the configured logger (wandb or swanlab)"""
+
+        generations_to_log = self.config.trainer.log_val_generations
+
+        if generations_to_log == 0:
+            return
+
+        import numpy as np
+
+        # Create tuples of (input, output, score) and sort by input text
+        samples = list(zip(inputs, outputs, scores))
+        samples.sort(key=lambda x: x[0])  # Sort by input text
+
+        # Use fixed random seed for deterministic shuffling
+        rng = np.random.RandomState(42)
+        rng.shuffle(samples)
+
+        # Take first N samples after shuffling
+        samples = samples[:generations_to_log]
+
+        # Log to each configured logger
+        self.validation_generations_logger.log(self.config.trainer.logger, samples, self.global_steps)
+
+    def _validate(self):
+        data_source_lst = []
+        reward_extra_infos_dict: dict[str, list] = defaultdict(list)
+
+        # Lists to collect samples for the table
+        sample_inputs = []
+        sample_outputs = []
+        sample_scores = []
+
+        for test_data in self.val_dataloader:
+            test_batch = DataProto.from_single_dict(test_data)
+
+            # repeat test batch
+            test_batch = test_batch.repeat(repeat_times=self.config.actor_rollout_ref.rollout.val_kwargs.n, interleave=True)
+
+            # we only do validation on rule-based rm
+            if self.config.reward_model.enable and test_batch[0].non_tensor_batch["reward_model"]["style"] == "model":
+                return {}
+
+            # Store original inputs
+            input_ids = test_batch.batch["input_ids"]
+            # TODO: Can we keep special tokens except for padding tokens?
+            input_texts = [self.tokenizer.decode(ids, skip_special_tokens=True) for ids in input_ids]
+            sample_inputs.extend(input_texts)
+
+            batch_keys_to_pop = ["input_ids", "attention_mask", "position_ids"]
+            non_tensor_batch_keys_to_pop = ["raw_prompt_ids"]
+            if "multi_modal_data" in test_batch.non_tensor_batch:
+                non_tensor_batch_keys_to_pop.append("multi_modal_data")
+            if "raw_prompt" in test_batch.non_tensor_batch:
+                non_tensor_batch_keys_to_pop.append("raw_prompt")
+            if "tools_kwargs" in test_batch.non_tensor_batch:
+                non_tensor_batch_keys_to_pop.append("tools_kwargs")
+            test_gen_batch = test_batch.pop(
+                batch_keys=batch_keys_to_pop,
+                non_tensor_batch_keys=non_tensor_batch_keys_to_pop,
+            )
+
+            test_gen_batch.meta_info = {
+                "eos_token_id": self.tokenizer.eos_token_id,
+                "pad_token_id": self.tokenizer.pad_token_id,
+                "recompute_log_prob": False,
+                "do_sample": self.config.actor_rollout_ref.rollout.val_kwargs.do_sample,
+                "validate": True,
+            }
+            print(f"test_gen_batch meta info: {test_gen_batch.meta_info}")
+
+            # pad to be divisible by dp_size
+            test_gen_batch_padded, pad_size = pad_dataproto_to_divisor(test_gen_batch, self.actor_rollout_wg.world_size)
+            if not self.async_rollout_mode:
+                test_output_gen_batch_padded = self.actor_rollout_wg.generate_sequences(test_gen_batch_padded)
+            else:
+                self.async_rollout_manager.wake_up()
+                test_output_gen_batch_padded = self.async_rollout_manager.generate_sequences(test_gen_batch_padded)
+                self.async_rollout_manager.sleep()
+
+            # unpad
+            test_output_gen_batch = unpad_dataproto(test_output_gen_batch_padded, pad_size=pad_size)
+            print("validation generation end")
+
+            # Store generated outputs
+            output_ids = test_output_gen_batch.batch["responses"]
+            output_texts = [self.tokenizer.decode(ids, skip_special_tokens=True) for ids in output_ids]
+            sample_outputs.extend(output_texts)
+
+            test_batch = test_batch.union(test_output_gen_batch)
+
+            # evaluate using reward_function
+            result = self.val_reward_fn(test_batch, return_dict=True)
+            reward_tensor = result["reward_tensor"]
+            scores = reward_tensor.sum(-1).cpu().tolist()
+            sample_scores.extend(scores)
+
+            reward_extra_infos_dict["reward"].extend(scores)
+            if "reward_extra_info" in result:
+                for key, lst in result["reward_extra_info"].items():
+                    reward_extra_infos_dict[key].extend(lst)
+
+            data_source_lst.append(test_batch.non_tensor_batch.get("data_source", ["unknown"] * reward_tensor.shape[0]))
+
+        self._maybe_log_val_generations(inputs=sample_inputs, outputs=sample_outputs, scores=sample_scores)
+
+        # dump generations
+        val_data_dir = self.config.trainer.get("validation_data_dir", None)
+        if val_data_dir:
+            self._dump_generations(
+                inputs=sample_inputs,
+                outputs=sample_outputs,
+                scores=sample_scores,
+                reward_extra_infos_dict=reward_extra_infos_dict,
+                dump_path=val_data_dir,
+            )
+
+        for key_info, lst in reward_extra_infos_dict.items():
+            assert len(lst) == 0 or len(lst) == len(sample_scores), f"{key_info}: {len(lst)=}, {len(sample_scores)=}"
+
+        data_sources = np.concatenate(data_source_lst, axis=0)
+
+        data_src2var2metric2val = process_validation_metrics(data_sources, sample_inputs, reward_extra_infos_dict)
+        metric_dict = {}
+        for data_source, var2metric2val in data_src2var2metric2val.items():
+            core_var = "acc" if "acc" in var2metric2val else "reward"
+            for var_name, metric2val in var2metric2val.items():
+                n_max = max([int(name.split("@")[-1].split("/")[0]) for name in metric2val.keys()])
+                for metric_name, metric_val in metric2val.items():
+                    if (var_name == core_var) and any(metric_name.startswith(pfx) for pfx in ["mean", "maj", "best"]) and (f"@{n_max}" in metric_name):
+                        metric_sec = "val-core"
+                    else:
+                        metric_sec = "val-aux"
+                    pfx = f"{metric_sec}/{data_source}/{var_name}/{metric_name}"
+                    metric_dict[pfx] = metric_val
+
+        return metric_dict
+
+    def init_workers(self):
+        """Initialize distributed training workers using Ray backend.
+
+        Creates:
+        1. Ray resource pools from configuration
+        2. Worker groups for each role (actor, critic, etc.)
+        """
+        self.resource_pool_manager.create_resource_pool()
+
+        self.resource_pool_to_cls = {pool: {} for pool in self.resource_pool_manager.resource_pool_dict.values()}
+
+        # create actor and rollout
+        if self.hybrid_engine:
+            resource_pool = self.resource_pool_manager.get_resource_pool(Role.ActorRollout)
+            actor_rollout_cls = RayClassWithInitArgs(
+                cls=self.role_worker_mapping[Role.ActorRollout],
+                config=self.config.actor_rollout_ref,
+                role="actor_rollout",
+            )
+            self.resource_pool_to_cls[resource_pool]["actor_rollout"] = actor_rollout_cls
+        else:
+            raise NotImplementedError
+
+        # create critic
+        if self.use_critic:
+            resource_pool = self.resource_pool_manager.get_resource_pool(Role.Critic)
+            critic_cls = RayClassWithInitArgs(cls=self.role_worker_mapping[Role.Critic], config=self.config.critic)
+            self.resource_pool_to_cls[resource_pool]["critic"] = critic_cls
+
+        # create reference policy if needed
+        if self.use_reference_policy:
+            resource_pool = self.resource_pool_manager.get_resource_pool(Role.RefPolicy)
+            ref_policy_cls = RayClassWithInitArgs(self.role_worker_mapping[Role.RefPolicy], config=self.config.actor_rollout_ref, role="ref")
+            self.resource_pool_to_cls[resource_pool]["ref"] = ref_policy_cls
+
+        # create a reward model if reward_fn is None
+        if self.use_rm:
+            # we create a RM here
+            resource_pool = self.resource_pool_manager.get_resource_pool(Role.RewardModel)
+            rm_cls = RayClassWithInitArgs(self.role_worker_mapping[Role.RewardModel], config=self.config.reward_model)
+            self.resource_pool_to_cls[resource_pool]["rm"] = rm_cls
+
+        # initialize WorkerGroup
+        # NOTE: if you want to use a different resource pool for each role, which can support different parallel size,
+        # you should not use `create_colocated_worker_cls`.
+        # Instead, directly pass different resource pool to different worker groups.
+        # See https://github.com/volcengine/verl/blob/master/examples/ray/tutorial.ipynb for more information.
+        all_wg = {}
+        wg_kwargs = {}  # Setting up kwargs for RayWorkerGroup
+        if OmegaConf.select(self.config.trainer, "ray_wait_register_center_timeout") is not None:
+            wg_kwargs["ray_wait_register_center_timeout"] = self.config.trainer.ray_wait_register_center_timeout
+
+        for resource_pool, class_dict in self.resource_pool_to_cls.items():
+            worker_dict_cls = create_colocated_worker_cls(class_dict=class_dict)
+            wg_dict = self.ray_worker_group_cls(resource_pool=resource_pool, ray_cls_with_init=worker_dict_cls, device_name=self.device_name, **wg_kwargs)
+            spawn_wg = wg_dict.spawn(prefix_set=class_dict.keys())
+            all_wg.update(spawn_wg)
+
+        if self.use_critic:
+            self.critic_wg = all_wg["critic"]
+            self.critic_wg.init_model()
+
+        if self.use_reference_policy and not self.ref_in_actor:
+            self.ref_policy_wg = all_wg["ref"]
+            self.ref_policy_wg.init_model()
+
+        if self.use_rm:
+            self.rm_wg = all_wg["rm"]
+            self.rm_wg.init_model()
+
+        # we should create rollout at the end so that vllm can have a better estimation of kv cache memory
+        self.actor_rollout_wg = all_wg["actor_rollout"]
+        self.actor_rollout_wg.init_model()
+
+        # create async rollout manager and request scheduler
+        self.async_rollout_mode = False
+        if self.config.actor_rollout_ref.rollout.mode == "async":
+            self.async_rollout_mode = True
+            from verl.workers.rollout.async_server import AsyncLLMServerManager
+            self.async_rollout_manager = AsyncLLMServerManager(
+                config=self.config.actor_rollout_ref,
+                worker_group=self.actor_rollout_wg,
+            )
+
+    def _save_checkpoint(self):
+        # path: given_path + `/global_step_{global_steps}` + `/actor`
+        local_global_step_folder = os.path.join(self.config.trainer.default_local_dir, f"global_step_{self.global_steps}")
+
+        print(f"local_global_step_folder: {local_global_step_folder}")
+        actor_local_path = os.path.join(local_global_step_folder, "actor")
+
+        actor_remote_path = None if self.config.trainer.default_hdfs_dir is None else os.path.join(self.config.trainer.default_hdfs_dir, f"global_step_{self.global_steps}", "actor")
+
+        remove_previous_ckpt_in_save = self.config.trainer.get("remove_previous_ckpt_in_save", False)
+        if remove_previous_ckpt_in_save:
+            print("Warning: remove_previous_ckpt_in_save is deprecated," + " set max_actor_ckpt_to_keep=1 and max_critic_ckpt_to_keep=1 instead")
+        max_actor_ckpt_to_keep = self.config.trainer.get("max_actor_ckpt_to_keep", None) if not remove_previous_ckpt_in_save else 1
+        max_critic_ckpt_to_keep = self.config.trainer.get("max_critic_ckpt_to_keep", None) if not remove_previous_ckpt_in_save else 1
+
+        self.actor_rollout_wg.save_checkpoint(actor_local_path, actor_remote_path, self.global_steps, max_ckpt_to_keep=max_actor_ckpt_to_keep)
+
+        if self.use_critic:
+            critic_local_path = os.path.join(local_global_step_folder, "critic")
+            critic_remote_path = None if self.config.trainer.default_hdfs_dir is None else os.path.join(self.config.trainer.default_hdfs_dir, f"global_step_{self.global_steps}", "critic")
+            self.critic_wg.save_checkpoint(critic_local_path, critic_remote_path, self.global_steps, max_ckpt_to_keep=max_critic_ckpt_to_keep)
+
+        # save dataloader
+        BaseCheckpointManager.local_mkdir(local_global_step_folder)
+        dataloader_local_path = os.path.join(local_global_step_folder, "data.pt")
+        dataloader_state_dict = self.train_dataloader.state_dict()
+        torch.save(dataloader_state_dict, dataloader_local_path)
+
+        # latest checkpointed iteration tracker (for atomic usage)
+        local_latest_checkpointed_iteration = os.path.join(self.config.trainer.default_local_dir, "latest_checkpointed_iteration.txt")
+        with open(local_latest_checkpointed_iteration, "w") as f:
+            f.write(str(self.global_steps))
+
+    def _load_checkpoint(self):
+        if self.config.trainer.resume_mode == "disable":
+            return 0
+
+        # load from hdfs
+        if self.config.trainer.default_hdfs_dir is not None:
+            raise NotImplementedError("load from hdfs is not implemented yet")
+        else:
+            checkpoint_folder = self.config.trainer.default_local_dir  # TODO: check path
+            if not os.path.isabs(checkpoint_folder):
+                working_dir = os.getcwd()
+                checkpoint_folder = os.path.join(working_dir, checkpoint_folder)
+            global_step_folder = find_latest_ckpt_path(checkpoint_folder)  # None if no latest
+
+        # find global_step_folder
+        if self.config.trainer.resume_mode == "auto":
+            if global_step_folder is None:
+                print("Training from scratch")
+                return 0
+        else:
+            if self.config.trainer.resume_mode == "resume_path":
+                assert isinstance(self.config.trainer.resume_from_path, str), "resume ckpt must be str type"
+                assert "global_step_" in self.config.trainer.resume_from_path, "resume ckpt must specify the global_steps"
+                global_step_folder = self.config.trainer.resume_from_path
+                if not os.path.isabs(global_step_folder):
+                    working_dir = os.getcwd()
+                    global_step_folder = os.path.join(working_dir, global_step_folder)
+        print(f"Load from checkpoint folder: {global_step_folder}")
+        # set global step
+        self.global_steps = int(global_step_folder.split("global_step_")[-1])
+
+        print(f"Setting global step to {self.global_steps}")
+        print(f"Resuming from {global_step_folder}")
+
+        actor_path = os.path.join(global_step_folder, "actor")
+        critic_path = os.path.join(global_step_folder, "critic")
+        # load actor
+        self.actor_rollout_wg.load_checkpoint(actor_path, del_local_after_load=self.config.trainer.del_local_ckpt_after_load)
+        # load critic
+        if self.use_critic:
+            self.critic_wg.load_checkpoint(critic_path, del_local_after_load=self.config.trainer.del_local_ckpt_after_load)
+
+        # load dataloader,
+        # TODO: from remote not implemented yet
+        dataloader_local_path = os.path.join(global_step_folder, "data.pt")
+        if os.path.exists(dataloader_local_path):
+            dataloader_state_dict = torch.load(dataloader_local_path, weights_only=False)
+            self.train_dataloader.load_state_dict(dataloader_state_dict)
+        else:
+            print(f"Warning: No dataloader state found at {dataloader_local_path}, will start from scratch")
+
+    def _balance_batch(self, batch: DataProto, metrics, logging_prefix="global_seqlen"):
+        """Reorder the data on single controller such that each dp rank gets similar total tokens"""
+        attention_mask = batch.batch["attention_mask"]
+        batch_size = attention_mask.shape[0]
+        global_seqlen_lst = batch.batch["attention_mask"].view(batch_size, -1).sum(-1).tolist()  # (train_batch_size,)
+        world_size = self.actor_rollout_wg.world_size
+        global_partition_lst = get_seqlen_balanced_partitions(global_seqlen_lst, k_partitions=world_size, equal_size=True)
+        # reorder based on index. The data will be automatically equally partitioned by dispatch function
+        global_idx = torch.tensor([j for partition in global_partition_lst for j in partition])
+        batch.reorder(global_idx)
+        global_balance_stats = log_seqlen_unbalance(seqlen_list=global_seqlen_lst, partitions=global_partition_lst, prefix=logging_prefix)
+        metrics.update(global_balance_stats)
+
+    def fit(self):
+        """
+        The training loop of PPO.
+        The driver process only need to call the compute functions of the worker group through RPC
+        to construct the PPO dataflow.
+        The light-weight advantage computation is done on the driver process.
+        """
+        from omegaconf import OmegaConf
+
+        from verl.utils.tracking import Tracking
+
+        logger = Tracking(
+            project_name=self.config.trainer.project_name,
+            experiment_name=self.config.trainer.experiment_name,
+            default_backend=self.config.trainer.logger,
+            config=OmegaConf.to_container(self.config, resolve=True),
+        )
+
+        self.global_steps = 0
+
+        # load checkpoint before doing anything
+        self._load_checkpoint()
+
+        # perform validation before training
+        # currently, we only support validation using the reward_function.
+        if self.val_reward_fn is not None and self.config.trainer.get("val_before_train", True):
+            val_metrics = self._validate()
+            assert val_metrics, f"{val_metrics=}"
+            pprint(f"Initial validation metrics: {val_metrics}")
+            logger.log(data=val_metrics, step=self.global_steps)
+            if self.config.trainer.get("val_only", False):
+                return
+
+        # add tqdm
+        progress_bar = tqdm(total=self.total_training_steps, initial=self.global_steps, desc="Training Progress")
+
+        # we start from step 1
+        self.global_steps += 1
+        last_val_metrics = None
+
+        for epoch in range(self.config.trainer.total_epochs):
+            for batch_dict in self.train_dataloader:
+                metrics = {}
+                timing_raw = {}
+                batch: DataProto = DataProto.from_single_dict(batch_dict)
+
+                # pop those keys for generation
+                batch_keys_to_pop = ["input_ids", "attention_mask", "position_ids"]
+                non_tensor_batch_keys_to_pop = ["raw_prompt_ids"]
+                if "multi_modal_data" in batch.non_tensor_batch:
+                    non_tensor_batch_keys_to_pop.append("multi_modal_data")
+                if "raw_prompt" in batch.non_tensor_batch:
+                    non_tensor_batch_keys_to_pop.append("raw_prompt")
+                if "tools_kwargs" in batch.non_tensor_batch:
+                    non_tensor_batch_keys_to_pop.append("tools_kwargs")
+                gen_batch = batch.pop(
+                    batch_keys=batch_keys_to_pop,
+                    non_tensor_batch_keys=non_tensor_batch_keys_to_pop,
+                )
+
+                is_last_step = self.global_steps >= self.total_training_steps
+
+                with _timer("step", timing_raw):
+                    # generate a batch
+                    with _timer("gen", timing_raw):
+                        if not self.async_rollout_mode:
+                            gen_batch_output = self.actor_rollout_wg.generate_sequences(gen_batch)
+                        else:
+                            self.async_rollout_manager.wake_up()
+                            gen_batch_output = self.async_rollout_manager.generate_sequences(gen_batch)
+                            self.async_rollout_manager.sleep()
+
+                    if self.config.algorithm.adv_estimator == AdvantageEstimator.REMAX:
+                        with _timer("gen_max", timing_raw):
+                            gen_baseline_batch = deepcopy(gen_batch)
+                            gen_baseline_batch.meta_info["do_sample"] = False
+                            gen_baseline_output = self.actor_rollout_wg.generate_sequences(gen_baseline_batch)
+
+                            batch = batch.union(gen_baseline_output)
+                            reward_baseline_tensor = self.reward_fn(batch)
+                            reward_baseline_tensor = reward_baseline_tensor.sum(dim=-1)
+
+                            batch.pop(batch_keys=list(gen_baseline_output.batch.keys()))
+
+                            batch.batch["reward_baselines"] = reward_baseline_tensor
+
+                            del gen_baseline_batch, gen_baseline_output
+
+                    batch.non_tensor_batch["uid"] = np.array([str(uuid.uuid4()) for _ in range(len(batch.batch))], dtype=object)
+                    # repeat to align with repeated responses in rollout
+                    batch = batch.repeat(repeat_times=self.config.actor_rollout_ref.rollout.n, interleave=True)
+                    batch = batch.union(gen_batch_output)
+
+                    batch.batch["response_mask"] = compute_response_mask(batch)
+                    # Balance the number of valid tokens across DP ranks.
+                    # NOTE: This usually changes the order of data in the `batch`,
+                    # which won't affect the advantage calculation (since it's based on uid),
+                    # but might affect the loss calculation (due to the change of mini-batching).
+                    # TODO: Decouple the DP balancing and mini-batching.
+                    if self.config.trainer.balance_batch:
+                        self._balance_batch(batch, metrics=metrics)
+
+                    # compute global_valid tokens
+                    batch.meta_info["global_token_num"] = torch.sum(batch.batch["attention_mask"], dim=-1).tolist()
+
+                    with _timer("reward", timing_raw):
+                        # compute reward model score
+                        if self.use_rm:
+                            reward_tensor = self.rm_wg.compute_rm_score(batch)
+                            batch = batch.union(reward_tensor)
+
+                        if self.config.reward_model.launch_reward_fn_async:
+                            future_reward = compute_reward_async.remote(batch, self.config, self.tokenizer)
+                        else:
+                            reward_tensor, reward_extra_infos_dict = compute_reward(batch, self.reward_fn)
+
+                    # recompute old_log_probs
+                    with _timer("old_log_prob", timing_raw):
+                        old_log_prob = self.actor_rollout_wg.compute_log_prob(batch)
+                        entropys = old_log_prob.batch["entropys"]
+                        response_masks = batch.batch["response_mask"]
+                        loss_agg_mode = self.config.actor_rollout_ref.actor.loss_agg_mode
+                        entropy_loss = agg_loss(loss_mat=entropys, loss_mask=response_masks, loss_agg_mode=loss_agg_mode)
+                        old_log_prob_metrics = {"actor/entropy_loss": entropy_loss.detach().item()}
+                        metrics.update(old_log_prob_metrics)
+                        old_log_prob.batch.pop("entropys")
+                        batch = batch.union(old_log_prob)
+
+                        if "rollout_log_probs" in batch.batch.keys():
+                            # TODO: we may want to add diff of probs too.
+                            rollout_old_log_probs = batch.batch["rollout_log_probs"]
+                            actor_old_log_probs = batch.batch["old_log_probs"]
+                            attention_mask = batch.batch["attention_mask"]
+                            responses = batch.batch["responses"]
+                            response_length = responses.size(1)
+                            response_mask = attention_mask[:, -response_length:]
+                            
+                            print(f"====== {rollout_old_log_probs=}, {actor_old_log_probs=} ========")
+
+                            rollout_probs = torch.exp(rollout_old_log_probs)
+                            actor_probs = torch.exp(actor_old_log_probs)
+                            rollout_probs_diff = torch.abs(rollout_probs - actor_probs)
+                            rollout_probs_diff = torch.masked_select(rollout_probs_diff, response_mask.bool())
+                            rollout_probs_diff_max = torch.max(rollout_probs_diff)
+                            rollout_probs_diff_mean = torch.mean(rollout_probs_diff)
+                            rollout_probs_diff_std = torch.std(rollout_probs_diff)
+                            metrics.update(
+                                {
+                                    "training/rollout_probs_diff_max": rollout_probs_diff_max.detach().item(),
+                                    "training/rollout_probs_diff_mean": rollout_probs_diff_mean.detach().item(),
+                                    "training/rollout_probs_diff_std": rollout_probs_diff_std.detach().item(),
+                                }
+                            )
+
+                    if self.use_reference_policy:
+                        # compute reference log_prob
+                        with _timer("ref", timing_raw):
+                            if not self.ref_in_actor:
+                                ref_log_prob = self.ref_policy_wg.compute_ref_log_prob(batch)
+                            else:
+                                ref_log_prob = self.actor_rollout_wg.compute_ref_log_prob(batch)
+                            batch = batch.union(ref_log_prob)
+
+                    # compute values
+                    if self.use_critic:
+                        with _timer("values", timing_raw):
+                            values = self.critic_wg.compute_values(batch)
+                            batch = batch.union(values)
+
+                    with _timer("adv", timing_raw):
+                        # we combine with rule-based rm
+                        reward_extra_infos_dict: dict[str, list]
+                        if self.config.reward_model.launch_reward_fn_async:
+                            reward_tensor, reward_extra_infos_dict = ray.get(future_reward)
+                        batch.batch["token_level_scores"] = reward_tensor
+
+                        print(f"{list(reward_extra_infos_dict.keys())=}")
+                        if reward_extra_infos_dict:
+                            batch.non_tensor_batch.update({k: np.array(v) for k, v in reward_extra_infos_dict.items()})
+
+                        # compute rewards. apply_kl_penalty if available
+                        if self.config.algorithm.use_kl_in_reward:
+                            batch, kl_metrics = apply_kl_penalty(batch, kl_ctrl=self.kl_ctrl_in_reward, kl_penalty=self.config.algorithm.kl_penalty)
+                            metrics.update(kl_metrics)
+                        else:
+                            batch.batch["token_level_rewards"] = batch.batch["token_level_scores"]
+
+                        # compute advantages, executed on the driver process
+
+                        norm_adv_by_std_in_grpo = self.config.algorithm.get("norm_adv_by_std_in_grpo", True)  # GRPO adv normalization factor
+
+                        batch = compute_advantage(
+                            batch,
+                            adv_estimator=self.config.algorithm.adv_estimator,
+                            gamma=self.config.algorithm.gamma,
+                            lam=self.config.algorithm.lam,
+                            num_repeat=self.config.actor_rollout_ref.rollout.n,
+                            norm_adv_by_std_in_grpo=norm_adv_by_std_in_grpo,
+                            multi_turn=self.config.actor_rollout_ref.rollout.multi_turn.enable,
+                            use_pf_ppo=self.config.algorithm.use_pf_ppo,
+                            pf_ppo_reweight_method=self.config.algorithm.pf_ppo.reweight_method,
+                            pf_ppo_weight_pow=self.config.algorithm.pf_ppo.weight_pow,
+                        )
+
+                    # update critic
+                    if self.use_critic:
+                        with _timer("update_critic", timing_raw):
+                            critic_output = self.critic_wg.update_critic(batch)
+                        critic_output_metrics = reduce_metrics(critic_output.meta_info["metrics"])
+                        metrics.update(critic_output_metrics)
+
+                    # implement critic warmup
+                    if self.config.trainer.critic_warmup <= self.global_steps:
+                        # update actor
+                        with _timer("update_actor", timing_raw):
+                            batch.meta_info["multi_turn"] = self.config.actor_rollout_ref.rollout.multi_turn.enable
+                            actor_output = self.actor_rollout_wg.update_actor(batch)
+                        actor_output_metrics = reduce_metrics(actor_output.meta_info["metrics"])
+                        metrics.update(actor_output_metrics)
+
+                    # Log rollout generations if enabled
+                    rollout_data_dir = self.config.trainer.get("rollout_data_dir", None)
+                    if rollout_data_dir:
+                        with _timer("dump_rollout_generations", timing_raw):
+                            print(batch.batch.keys())
+                            inputs = self.tokenizer.batch_decode(batch.batch["prompts"], skip_special_tokens=True)
+                            outputs = self.tokenizer.batch_decode(batch.batch["responses"], skip_special_tokens=True)
+                            scores = batch.batch["token_level_scores"].sum(-1).cpu().tolist()
+                            self._dump_generations(
+                                inputs=inputs,
+                                outputs=outputs,
+                                scores=scores,
+                                reward_extra_infos_dict=reward_extra_infos_dict,
+                                dump_path=rollout_data_dir,
+                            )
+
+                    # validate
+                    if self.val_reward_fn is not None and self.config.trainer.test_freq > 0 and (is_last_step or self.global_steps % self.config.trainer.test_freq == 0):
+                        with _timer("testing", timing_raw):
+                            val_metrics: dict = self._validate()
+                            if is_last_step:
+                                last_val_metrics = val_metrics
+                        metrics.update(val_metrics)
+
+                    if self.config.trainer.save_freq > 0 and (is_last_step or self.global_steps % self.config.trainer.save_freq == 0):
+                        with _timer("save_checkpoint", timing_raw):
+                            self._save_checkpoint()
+
+                # training metrics
+                metrics.update(
+                    {
+                        "training/global_step": self.global_steps,
+                        "training/epoch": epoch,
+                    }
+                )
+                # collect metrics
+                metrics.update(compute_data_metrics(batch=batch, use_critic=self.use_critic))
+                metrics.update(compute_timing_metrics(batch=batch, timing_raw=timing_raw))
+                # TODO: implement actual tflpo and theoretical tflpo
+                n_gpus = self.resource_pool_manager.get_n_gpus()
+                metrics.update(compute_throughout_metrics(batch=batch, timing_raw=timing_raw, n_gpus=n_gpus))
+
+                # TODO: make a canonical logger that supports various backend
+                logger.log(data=metrics, step=self.global_steps)
+
+                progress_bar.update(1)
+                self.global_steps += 1
+                if is_last_step:
+                    pprint(f"Final validation metrics: {last_val_metrics}")
+                    progress_bar.close()
+                    return
diff --git a/recipe/moe/moe_trainer/ppo/reward.py b/recipe/moe/moe_trainer/ppo/reward.py
new file mode 100644
index 00000000000..7f6910ef35f
--- /dev/null
+++ b/recipe/moe/moe_trainer/ppo/reward.py
@@ -0,0 +1,131 @@
+# Copyright 2025 Individual Contributor: Thibaut Barroyer
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import multiprocessing
+import os
+from functools import partial
+
+import ray
+
+from verl import DataProto
+from verl.utils.reward_score import default_compute_score
+
+
+def get_custom_reward_fn(config):
+    import importlib.util
+    import sys
+
+    reward_fn_config = config.get("custom_reward_function") or {}
+    file_path = reward_fn_config.get("path")
+    if not file_path:
+        return None
+
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"Reward function file '{file_path}' not found.")
+
+    spec = importlib.util.spec_from_file_location("custom_module", file_path)
+    module = importlib.util.module_from_spec(spec)
+    try:
+        sys.modules["custom_module"] = module
+        spec.loader.exec_module(module)
+    except Exception as e:
+        raise RuntimeError(f"Error loading module from '{file_path}': {e}") from e
+
+    function_name = reward_fn_config.get("name")
+    if not hasattr(module, function_name):
+        raise AttributeError(f"Reward function '{function_name}' not found in '{file_path}'.")
+
+    print(f"using customized reward function '{function_name}' from '{file_path}'")
+    raw_fn = getattr(module, function_name)
+
+    reward_kwargs = dict(reward_fn_config.get("reward_kwargs", {}))
+
+    def wrapped_fn(*args, **kwargs):
+        return raw_fn(*args, **kwargs, **reward_kwargs)
+
+    return wrapped_fn
+
+
+def load_reward_manager(config, tokenizer, num_examine, **reward_kwargs):
+    reward_manager_name = config.reward_model.get("reward_manager", "naive")
+    if reward_manager_name == "naive":
+        from verl.workers.reward_manager import NaiveRewardManager
+
+        reward_manager_cls = NaiveRewardManager
+    elif reward_manager_name == "prime":
+        from verl.workers.reward_manager import PrimeRewardManager
+
+        reward_manager_cls = PrimeRewardManager
+    elif reward_manager_name == "batch":
+        from verl.workers.reward_manager import BatchRewardManager
+
+        reward_manager_cls = BatchRewardManager
+    elif reward_manager_name == "dapo":
+        from verl.workers.reward_manager import DAPORewardManager
+
+        reward_manager_cls = DAPORewardManager
+    else:
+        raise NotImplementedError
+
+    compute_score = get_custom_reward_fn(config)
+    final_compute_score = compute_score
+
+    if compute_score is None:
+        sandbox_config = config.reward_model.get("sandbox_fusion")
+        sandbox_url = sandbox_config.get("url") if sandbox_config else None
+        if sandbox_url:
+            sandbox_manager = multiprocessing.Manager()
+            _concurrent_semaphore = sandbox_manager.Semaphore(sandbox_config.get("max_concurrent", 64))
+            final_compute_score = partial(default_compute_score, sandbox_fusion_url=sandbox_url, concurrent_semaphore=_concurrent_semaphore)
+        else:
+            final_compute_score = default_compute_score
+
+    return reward_manager_cls(
+        tokenizer=tokenizer,
+        num_examine=num_examine,
+        compute_score=final_compute_score,
+        reward_fn_key=config.data.reward_fn_key,
+        **reward_kwargs,
+    )
+
+
+def compute_reward(data: DataProto, reward_fn):
+    """
+    Compute reward for a batch of data.
+    Args:
+        data: DataProto object containing the input data.
+        reward_fn: Reward function to compute the reward.
+    Returns:
+        Tuple of reward tensor and extra info dictionary.
+    """
+    try:
+        reward_result = reward_fn(data, return_dict=True)
+        reward_tensor = reward_result["reward_tensor"]
+        reward_extra_infos_dict = reward_result["reward_extra_info"]
+    except Exception as e:
+        print(f"Error in reward_fn: {e}")
+        reward_tensor = reward_fn(data)
+        reward_extra_infos_dict = {}
+
+    return reward_tensor, reward_extra_infos_dict
+
+
+@ray.remote(num_cpus=1)
+def compute_reward_async(data: DataProto, config, tokenizer):
+    """
+    Load the reward manager and compute the reward for a batch of data.
+    This is meant to be run in a separate Ray worker.
+    """
+    reward_fn = load_reward_manager(config, tokenizer, num_examine=0, **config.reward_model.get("reward_kwargs", {}))
+    return compute_reward(data, reward_fn)
diff --git a/recipe/moe/moe_trainer/runtime_env.yaml b/recipe/moe/moe_trainer/runtime_env.yaml
new file mode 100644
index 00000000000..5aa693cd71a
--- /dev/null
+++ b/recipe/moe/moe_trainer/runtime_env.yaml
@@ -0,0 +1,6 @@
+working_dir: ./
+excludes: ["/.git/"]
+env_vars:
+  TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
+  # If you are using vllm<=0.6.3, you might need to set the following environment variable to avoid bugs:
+  # VLLM_ATTENTION_BACKEND: "XFORMERS"
\ No newline at end of file
diff --git a/recipe/moe/scripts/run_xdg3_debug.sh b/recipe/moe/scripts/run_xdg3_debug.sh
new file mode 100644
index 00000000000..d3528f17ddb
--- /dev/null
+++ b/recipe/moe/scripts/run_xdg3_debug.sh
@@ -0,0 +1,35 @@
+set -x
+nproc_per_node=8
+save_path="/tmp"
+
+# Shift the arguments so $@ refers to the rest
+# shift 2
+echo $WORLD_SIZE $RANK 
+# export CUDA_LAUNCH_BLOCKING=1
+# export TORCH_SHOW_CPP_STACKTRACES=1
+
+# /cpfs/user/liuyanjiang/Eng/verl-dpskv2/data/sft_debug.parquet
+# /cpfs/user/sunzekai/general_alignment/moe_sft/useful_moe_145b/moe_sft_145b_32k_v7.2.0_CIF/iter_0001000_hf
+torchrun --nnodes=$WORLD_SIZE --nproc_per_node=$nproc_per_node --node_rank=$RANK  \
+     -m moe_trainer.fsdp_sft_trainer \
+    data.train_files=/newcpfs/user/liuyanjiang/Eng/agi-verl/recipe/moe/moe_trainer/debug_data/sft.json \
+    data.val_files=/cpfs/user/liuyanjiang/research/deepscaler/deepscaler/data/train/still.json \
+    data.prompt_key=problem \
+    data.response_key=answer \
+    data.micro_batch_size_per_gpu=1 \
+    data.max_length=1024 \
+    data.truncation=right \
+    model.fsdp_config.model_dtype=bf16 \
+    model.strategy=fsdp \
+    model.trust_remote_code=True \
+    optim.warmup_steps_ratio=0.05 \
+    optim.lr=5e-6 \
+    optim.weight_decay=0.1 \
+    model.enable_gradient_checkpointing=True \
+    model.partial_pretrain=/cpfs/user/liuyanjiang/hf_models/moe_sft_145b_32k_v7-2-0_CIF_iter_0001589_hf \
+    trainer.default_local_dir=$save_path \
+    trainer.project_name=gsm8k-sft \
+    trainer.experiment_name=cybertron_sft_debug \
+    trainer.total_epochs=4 \
+    trainer.logger=['console'] \
+    trainer.default_hdfs_dir=null $@
\ No newline at end of file
diff --git a/recipe/moe/scripts/run_xdg3_grpo_debug.sh b/recipe/moe/scripts/run_xdg3_grpo_debug.sh
new file mode 100644
index 00000000000..7622b5e9894
--- /dev/null
+++ b/recipe/moe/scripts/run_xdg3_grpo_debug.sh
@@ -0,0 +1,40 @@
+
+python3 -m verl.trainer.main_ppo \
+    algorithm.adv_estimator=grpo \
+    data.train_files=/cpfs/user/lizichao/RedMOE-verl-new/dataset/train_ifeval_like_3w_v1.parquet \
+    data.val_files=/cpfs/user/lizichao/RedMOE-verl-new/dataset/test_ifeval_benchmark.parquet \
+    data.train_batch_size=128 \
+    data.max_prompt_length=512 \
+    data.max_response_length=512 \
+    data.filter_overlong_prompts=True \
+    data.truncation='error' \
+    data.trust_remote_code=True \
+    actor_rollout_ref.model.path=/cpfs/user/liuyanjiang/hf_models/moe_sft_145b_32k_v7.2.0_CIF_iter_0001589_hf-3layers \
+    actor_rollout_ref.actor.optim.lr=1e-6 \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.actor.ppo_mini_batch_size=128 \
+    actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 \
+    actor_rollout_ref.actor.use_kl_loss=True \
+    actor_rollout_ref.actor.kl_loss_coef=0.001 \
+    actor_rollout_ref.actor.kl_loss_type=low_var_kl \
+    actor_rollout_ref.actor.entropy_coeff=0 \
+    actor_rollout_ref.model.enable_gradient_checkpointing=True \
+    actor_rollout_ref.actor.fsdp_config.param_offload=False \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=40 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
+    actor_rollout_ref.rollout.name=vllm \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
+    actor_rollout_ref.rollout.n=8 \
+    actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=40 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=True \
+    algorithm.use_kl_in_reward=False \
+    trainer.critic_warmup=0 \
+    trainer.logger=['console'] \
+    trainer.project_name='verl_grpo_example_gsm8k' \
+    trainer.experiment_name='qwen2_7b_function_rm' \
+    trainer.n_gpus_per_node=8 \
+    trainer.nnodes=1 \
+    trainer.save_freq=20 \
+    trainer.test_freq=5 \
+    trainer.total_epochs=15 $@
\ No newline at end of file
diff --git a/recipe/moe/scripts/run_xdg_debug.sh b/recipe/moe/scripts/run_xdg_debug.sh
new file mode 100644
index 00000000000..9f254455ab1
--- /dev/null
+++ b/recipe/moe/scripts/run_xdg_debug.sh
@@ -0,0 +1,35 @@
+set -x
+nproc_per_node=8
+save_path="/tmp"
+export HYDRA_FULL_ERROR=1
+
+# Shift the arguments so $@ refers to the rest
+# shift 2
+echo $WORLD_SIZE $RANK 
+# /cpfs/user/liuyanjiang/Eng/verl-dpskv2/data/sft_debug.parquet
+# /cpfs/user/sunzekai/general_alignment/moe_sft/useful_moe_145b/moe_sft_145b_32k_v7.2.0_CIF/iter_0001000_hf
+torchrun --nnodes=$WORLD_SIZE --nproc_per_node=$nproc_per_node --node_rank=$RANK  \
+     -m moe_trainer.fsdp_sft_trainer \
+    data.train_files=/cpfs/user/liuyanjiang/Eng/verl/examples/data/light-r1-stage1-v0.0.2.parquet \
+    data.val_files=/cpfs/user/liuyanjiang/research/deepscaler/deepscaler/data/train/still.json \
+    data.prompt_key=problem \
+    data.response_key=answer \
+    data.micro_batch_size_per_gpu=1 \
+    data.max_length=8192 \
+    data.truncation=right \
+    model.fsdp_config.model_dtype=bf16 \
+    model.strategy=fsdp \
+    model.fsdp_config.cpu_offload=True \
+    model.fsdp_config.offload_params=True \
+    model.trust_remote_code=True \
+    optim.warmup_steps_ratio=0.05 \
+    optim.lr=5e-6 \
+    optim.weight_decay=0.1 \
+    model.enable_gradient_checkpointing=True \
+    model.partial_pretrain=/cpfs/user/liuyanjiang/hf_models/moe_sft_145b_32k_v7-2-0_CIF_iter_0001589_hf \
+    trainer.default_local_dir=$save_path \
+    trainer.project_name=gsm8k-sft \
+    trainer.experiment_name=cybertron_sft_debug \
+    trainer.total_epochs=4 \
+    trainer.logger=['console'] \
+    trainer.default_hdfs_dir=null $@
\ No newline at end of file
diff --git a/recipe/moe/test/inputs.pkl b/recipe/moe/test/inputs.pkl
new file mode 100644
index 00000000000..10f5f654792
Binary files /dev/null and b/recipe/moe/test/inputs.pkl differ
diff --git a/recipe/moe/test/test_check_dump.ipynb b/recipe/moe/test/test_check_dump.ipynb
new file mode 100644
index 00000000000..83a73e24045
--- /dev/null
+++ b/recipe/moe/test/test_check_dump.ipynb
@@ -0,0 +1,232 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "22ce1a04",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import os\n",
+    "from glob import glob\n",
+    "from tqdm import tqdm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 383,
+   "id": "4fa1f403",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hf = glob('/newcpfs/user/liuyanjiang/Eng/agi-verl/recipe/moe/moe_trainer/dump_dir_2/*')\n",
+    "verl = glob('/newcpfs/user/liuyanjiang/Eng/agi-verl/recipe/moe/moe_trainer/dump_dir/*')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 384,
+   "id": "ddcad368",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hf_layer0 = [i for i in hf if 'layers.1' in i and 'mbs1' in i]\n",
+    "verl_layer0 = [i for i in verl if 'layers.1' in i]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 385,
+   "id": "09981815",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hf_layer0_input = [i for i in hf_layer0 if 'input' in i]\n",
+    "verl_layer0_input = [i for i in verl_layer0 if 'input' in i]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 386,
+   "id": "26bc70bd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['/newcpfs/user/liuyanjiang/Eng/agi-verl/recipe/moe/moe_trainer/dump_dir/model.layers.1.mlp.gate.inspect2-iter-mbs0-forward-input.pt',\n",
+       " '/newcpfs/user/liuyanjiang/Eng/agi-verl/recipe/moe/moe_trainer/dump_dir/model.layers.1.mlp.inspect_param-iter-mbs0-forward-input.pt',\n",
+       " '/newcpfs/user/liuyanjiang/Eng/agi-verl/recipe/moe/moe_trainer/dump_dir/model.layers.1.mlp.inspect_param2-iter-mbs0-forward-input.pt',\n",
+       " '/newcpfs/user/liuyanjiang/Eng/agi-verl/recipe/moe/moe_trainer/dump_dir/model.layers.1.mlp.gate.inspect-iter-mbs0-forward-input.pt']"
+      ]
+     },
+     "execution_count": 386,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "[i for i in verl_layer0_input if 'insp' in i]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 448,
+   "id": "f380aac4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hf_a = torch.load('/newcpfs/user/liuyanjiang/Eng/agi-verl/recipe/moe/moe_trainer/dump_dir_2/model.layers.2.mlp.gate-iter-mbs0-forward-output.pt', map_location='cpu', weights_only=False)\n",
+    "verl_a = torch.load('/newcpfs/user/liuyanjiang/Eng/agi-verl/recipe/moe/moe_trainer/dump_dir/model.layers.2.mlp.gate-iter-mbs0-forward-output.pt', map_location='cpu', weights_only=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 449,
+   "id": "9030da56",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 449,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "torch.allclose(hf_a[0], verl_a[0], rtol=1e-5, atol=1e-8)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 450,
+   "id": "5012e273",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(tensor([[ 12,  24,  51,  71,  83,  86],\n",
+       "         [ 14,  23,  24,  37, 114,  74],\n",
+       "         [  3,  23,  37, 107, 123,  29],\n",
+       "         ...,\n",
+       "         [ 17,  36,  38,  94,  45,  49],\n",
+       "         [ 17,  36,  38,  94,  45,  49],\n",
+       "         [ 17,  36,  38,  94,  45,  49]]),\n",
+       " tensor([[0.4184, 0.3977, 0.4881, 0.3874, 0.4390, 0.3693],\n",
+       "         [0.4302, 0.4012, 0.4651, 0.4041, 0.4012, 0.3983],\n",
+       "         [0.4146, 0.4146, 0.4114, 0.4335, 0.4146, 0.4114],\n",
+       "         ...,\n",
+       "         [0.4186, 0.4153, 0.4169, 0.4186, 0.4169, 0.4137],\n",
+       "         [0.4186, 0.4153, 0.4169, 0.4186, 0.4169, 0.4137],\n",
+       "         [0.4186, 0.4153, 0.4169, 0.4186, 0.4169, 0.4137]], requires_grad=True),\n",
+       " None)"
+      ]
+     },
+     "execution_count": 450,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "verl_a"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 451,
+   "id": "a07e177c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(tensor([[ 12,  24,  51,  71,  83,  86],\n",
+       "         [ 14,  23,  24,  37, 114,  74],\n",
+       "         [  3,  23,  37, 107, 123,  29],\n",
+       "         ...,\n",
+       "         [ 17,  36,  38,  94,  45,  49],\n",
+       "         [ 17,  36,  38,  94,  45,  49],\n",
+       "         [ 17,  36,  38,  94,  45,  49]]),\n",
+       " tensor([[0.4184, 0.3977, 0.4881, 0.3874, 0.4390, 0.3693],\n",
+       "         [0.4302, 0.4012, 0.4651, 0.4041, 0.4012, 0.3983],\n",
+       "         [0.4146, 0.4146, 0.4114, 0.4335, 0.4146, 0.4114],\n",
+       "         ...,\n",
+       "         [0.4186, 0.4153, 0.4169, 0.4186, 0.4169, 0.4137],\n",
+       "         [0.4186, 0.4153, 0.4169, 0.4186, 0.4169, 0.4137],\n",
+       "         [0.4186, 0.4153, 0.4169, 0.4186, 0.4169, 0.4137]], requires_grad=True),\n",
+       " None)"
+      ]
+     },
+     "execution_count": 451,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "hf_a"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 178,
+   "id": "f081cb9d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[ 17,  46,  51,  68,  82,  14],\n",
+       "        [ 12,  48,  86,  98, 105,  90],\n",
+       "        [  4,  90, 112, 121,   5,   8],\n",
+       "        ...,\n",
+       "        [  9,  17,  51,  75,   7,  14],\n",
+       "        [  9,  17,  51,  75,   7,  14],\n",
+       "        [  9,  17,  51,  75,   7,  14]])"
+      ]
+     },
+     "execution_count": 178,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "verl_a[0][1024:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "837b3d0f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "agi-verl",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/recipe/moe/test/test_forward_check.py b/recipe/moe/test/test_forward_check.py
new file mode 100644
index 00000000000..90b0b6bdf14
--- /dev/null
+++ b/recipe/moe/test/test_forward_check.py
@@ -0,0 +1,30 @@
+import sys
+import torch
+import pickle as pkl
+sys.path.insert(0, '/newcpfs/user/liuyanjiang/Eng/agi-verl/recipe/moe')
+from moe_trainer.bitdump import hook_fwd_bwd_to_module
+
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained('/cpfs/user/liuyanjiang/hf_models/moe_sft_145b_32k_v7.2.0_CIF_iter_0001589_hf-3layers', trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch.bfloat16)
+tokenizer = AutoTokenizer.from_pretrained('/cpfs/user/liuyanjiang/hf_models/moe_sft_145b_32k_v7.2.0_CIF_iter_0001589_hf-3layers', trust_remote_code=True)
+model.to(torch.device('cuda'))
+model.train()
+names = None
+dump_path = '/newcpfs/user/liuyanjiang/Eng/agi-verl/recipe/moe/moe_trainer/dump_dir_2'
+hook_fwd_bwd_to_module(model, names=names, prefix=f"{dump_path}/")
+
+with open('/newcpfs/user/liuyanjiang/Eng/agi-verl/recipe/moe/test/inputs.pkl', 'rb') as f:
+    inputs = pkl.load(f)
+
+for k, v in inputs.items():
+    inputs[k] = v.to(model.device)
+with torch.autocast("cuda", dtype=torch.bfloat16):
+    output = model(**inputs)
+
+# out = model(**inputs)
+
+logits = output['logits'].detach().cpu().numpy()
+print(logits)
+# with open('./patch.pkl', 'wb')as f:
+#     pkl.dump(logits, f)
\ No newline at end of file
diff --git a/verl/__init__.py b/verl/__init__.py
index 593f3dc6144..11fca9843c4 100644
--- a/verl/__init__.py
+++ b/verl/__init__.py
@@ -30,7 +30,7 @@
     __version__ = f.read().strip()
 
 
-set_basic_config(level=logging.WARNING)
+set_basic_config(level=logging.DEBUG)
 
 
 __all__ = ["DataProto", "__version__"]
diff --git a/verl/models/mcore/config_converter.py b/verl/models/mcore/config_converter.py
index 9daf550cdb8..00ff00bf764 100644
--- a/verl/models/mcore/config_converter.py
+++ b/verl/models/mcore/config_converter.py
@@ -383,3 +383,37 @@ def hf_to_mcore_config_llama4(
 ) -> TransformerConfig:
     # Llama4ForConditionalGeneration
     raise NotImplementedError("Llama4ForConditionalGeneration is not supported yet")
+
+
+def hf_to_mcore_config_xdgmoe(hf_config: PretrainedConfig, dtype: torch.dtype, **override_transformer_config_kwargs) -> TransformerConfig:
+    from cybertron.models.deepseek_v2.configure_deepseekv2 import DeepseekV2TransformerConfig
+    args = _get_base_transformer_config(
+        hf_config=hf_config,
+        dtype=dtype,
+        use_cpu_initialization=False,
+        add_bias_linear=False,
+        layernorm_epsilon=hf_config.rms_norm_eps,
+        max_position_embeddings=hf_config.max_position_embeddings,
+        # MoE specific
+        moe_ffn_hidden_size=hf_config.moe_intermediate_size,
+        moe_router_topk=hf_config.num_experts_per_tok,
+        num_moe_experts=hf_config.n_routed_experts,
+        moe_shared_expert_intermediate_size=hf_config.moe_intermediate_size * hf_config.n_shared_experts,
+        first_k_dense_replace=hf_config.first_k_dense_replace,
+        routed_scaling_factor=hf_config.routed_scaling_factor,
+        # moe_aux_loss_coeff=hf_config.router_aux_loss_coef,
+        use_router_expert_score_correction=True,
+        # moe_aux_loss_coeff=0.0,
+        moe_router_load_balancing_type="greedy",  # turn off aux_loss as it hurts perf in RL
+        moe_grouped_gemm=True,
+        router_scoring_func="sigmoid",
+        # Other optimizations
+        persist_layer_norm=True,
+        bias_activation_fusion=True,
+        bias_dropout_fusion=True,
+        # Qwen specific
+        qk_layernorm=True,
+        multi_latent_attention=False,
+        **override_transformer_config_kwargs,
+    )
+    return DeepseekV2TransformerConfig(**args)
diff --git a/verl/models/mcore/hf_mcore_loader.py b/verl/models/mcore/hf_mcore_loader.py
new file mode 100644
index 00000000000..5611313bc71
--- /dev/null
+++ b/verl/models/mcore/hf_mcore_loader.py
@@ -0,0 +1,449 @@
+import json
+import os
+import re
+from collections import defaultdict
+
+import torch
+from safetensors import safe_open
+from tqdm import tqdm
+
+
+class HfMcoreManager:
+
+    def __init__(self):
+        self.QKV_FUSE_NAME = ".linear_qkv."
+        self.GATE_UP_FUSE_NAME = ".linear_fc1."
+        self.UP_NAME = ".up_proj."
+        self.Q_NAME = ".q_proj."
+        self.REPLACE_DICT = {
+            "up_proj.": "linear_fc1.",
+            "down_proj.": "linear_fc2.",
+            "model.embed_tokens.": "embedding.word_embeddings.",
+            "model.layers.": "decoder.layers.",
+            "input_layernorm.weight": "self_attention.linear_qkv.layer_norm_weight",
+            "self_attn.q_proj.": "self_attention.linear_q.",
+            "self_attn.k_proj.": "self_attention.linear_k.",
+            "self_attn.v_proj.": "self_attention.linear_v.",
+            "self_attn.o_proj.": "self_attention.linear_proj.",
+            "self_attn.q_layernorm.": "self_attention.q_layernorm.",
+            "self_attn.k_layernorm.": "self_attention.k_layernorm.",
+            "self_attn.kv_layernorm.": "self_attention.kv_layernorm.",
+            # "self_attn.q_proj.": "self_attention.linear_q_proj.",
+            "self_attn.q_a_proj.": "self_attention.linear_q_down_proj.",
+            "self_attn.q_b_proj.": "self_attention.linear_q_up_proj.",
+            "self_attn.kv_a_proj_with_mqa.": "self_attention.linear_kv_down_proj.",
+            "self_attn.kv_b_proj.": "self_attention.linear_kv_up_proj.",
+            "post_attention_layernorm.": "pre_mlp_layernorm.",
+            ".gate_proj.": ".linear_gate.",
+            ".gate.": ".router.",
+            "model.norm.": "decoder.final_layernorm.",
+            "lm_head.": "output_layer.",
+        }
+
+        self.split_mode_dict = {
+            "self_attn.o_proj": (True, 1),  # RowParallel
+            "down_proj": (True, 1),  # RowParallel
+            "linear_q_up_proj": (True, 0),
+            "self_attn.q_proj": (True, 0),
+            "lm_head": (True, 0),
+            "embed_tokens": (True, 0),
+            "gate_proj": (True, 0),
+            "up_proj": (True, 0)
+        }
+
+    def is_qkv_fusion(self, layer_name):
+        suffix = ["weight", "bias"]
+        for s in suffix:
+            if self.Q_NAME + s in layer_name:
+                return True
+        return False
+
+    def hf_to_mcore_name(self, layer_name):
+        for key, value in self.REPLACE_DICT.items():
+            layer_name = layer_name.replace(key, value)
+        return layer_name
+
+    def get_split_mode(self, layer_name):
+        """
+        Determines the split mode for a given layer name.
+
+        Args:
+            layer_name (str): The name of the layer to check.
+
+        Returns:
+            tuple: (is_split (bool), split_dim (int))
+                - is_split: Whether the layer should be split.
+                - split_dim: The dimension along which to split.
+        """
+        for mode in self.split_mode_dict.keys():
+            suffix = [".weight", ".bias"]
+            for s in suffix:
+                if mode + s in layer_name:
+                    return self.split_mode_dict[mode]
+
+        return (False, 0)
+
+
+class DeepseekV2HfLoader(HfMcoreManager):
+    def __init__(
+        self,
+        config,
+        model_config=None,
+        tp_size=None,
+        tp_rank=None,
+        pp_size=None,
+        pp_rank=None,
+        ep_size=None,
+        ep_rank=None,
+    ):
+        super().__init__()
+        self.model_config = model_config
+        self.model_path = config.model.path
+
+        index_map_path = os.path.join(self.model_path, "model.safetensors.index.json")
+
+        if os.path.exists(index_map_path):
+            with open(index_map_path) as f:
+                file_mapping = json.load(f)
+                weight_mapping = file_mapping["weight_map"]
+        else:
+            model_tensor_path = os.path.join(self.model_path, "model.safetensors")
+            with safe_open(model_tensor_path, framework="pt") as f:
+                weight_mapping = {key: "model.safetensors" for key in f.keys()}
+
+        layer_to_params = defaultdict(set)
+        # layer_to_params ,key mglayername,value hflayernames
+        for k, v in weight_mapping.items():
+            if "model.embed_tokens" in k:
+                layer_to_params["embedding"].add(k)
+            elif "model.norm" in k:
+                # final rms norm
+                layer_to_params["final_norm"].add(k)
+            elif "model.layers." in k:
+                layer = int(k.split(".")[2])
+                layer_to_params[layer].add(k)
+            elif "lm_head.weight" in k:
+                layer = "lm_head"
+                layer_to_params[layer].add(k)
+            else:
+                assert False, k
+
+        self.hf_architecture = self.model_config.architectures[0]
+        self.untie_embeddings_and_output_weights = not self.model_config.tie_word_embeddings
+        self._weight_mapping = weight_mapping
+        self._layer_to_params = layer_to_params
+        self._tp_size = tp_size
+        self._tp_rank = tp_rank
+        self._pp_size = pp_size
+        self._pp_rank = pp_rank
+        self._ep_size = ep_size
+        self._ep_rank = ep_rank
+        self._num_layers = self.model_config.num_hidden_layers
+        self._head_num = self.model_config.num_attention_heads
+        self._num_query_groups = self.model_config.num_key_value_heads
+        self._hidden_size = self.model_config.hidden_size
+        self._ffn_hidden_size = self.model_config.intermediate_size
+        self._qk_layernorm = self.model_config.qk_layernorm
+        
+        self._padded_vocab_size = self.model_config.vocab_size
+        self._num_experts = self.model_config.n_routed_experts
+        self._moe_ffn_hidden_size = self.model_config.moe_intermediate_size
+        self._moe_shared_expert_intermediate_size = self.model_config.n_shared_experts * self.model_config.moe_intermediate_size
+        
+        self._kv_channel = int(self._hidden_size / self._head_num)
+        
+        self._first_pipeline_num_layers = getattr(config.actor.megatron, "first_pipeline_num_layers", None)
+        self._last_pipeline_num_layers = getattr(config.actor.megatron, "last_pipeline_num_layers", None)
+        
+        self._multi_latent_attention = getattr(self.model_config, "multi_latent_attention", False)
+        
+        
+        self._fuse_up_gate = True
+        self._moe_grouped_gemm = True
+
+    def adjust_mapping_table(self):
+        if self._multi_latent_attention:
+            # 在不使用mla时，目前的input_layernorm.weight 被映射为linear_qkv.layer_norm_weight。使用mla时该名字无需转换。
+            if "input_layernorm.weight" in self.REPLACE_DICT:
+                self.REPLACE_DICT.pop("input_layernorm.weight")
+            # 添加mla norm的映射
+            self.REPLACE_DICT["self_attn.kv_a_layernorm"] = "self_attention.kv_layernorm"
+            self.REPLACE_DICT["self_attn.q_a_layernorm"] = "self_attention.q_layernorm"
+        else:
+            # 未启动mla时，需要做qkv fuse，不pop的话会在split分支中处理q（process_tensor_operations 中的 if split）
+            self.split_mode_dict.pop("self_attn.q_proj")
+            self.REPLACE_DICT["self_attn.linear_qkv"] = "self_attention.linear_qkv"
+
+        # 当模型为qwen系列时，修改映射表。
+        if "Qwen" in self.hf_architecture:
+            # 修改attention之后的norm的映射，替换为了qwen的名字
+            self.REPLACE_DICT["post_attention_layernorm.weight"] = (
+                "mlp.linear_fc1.layer_norm_weight"
+            )
+            # 去掉旧的映射
+            self.REPLACE_DICT.pop("post_attention_layernorm.")
+
+        if not self._moe_grouped_gemm:
+            self.REPLACE_DICT[".experts."] = ".experts.local_experts."
+
+
+    def get_global_idx(self, layer_name, pp_offset, ep_offset):
+        layer_name = self.get_global_layer_idx(layer_name, pp_offset)
+        return self.get_global_expert_idx(layer_name, ep_offset)
+
+    def load_safe_tensor_file(self, file_name, params):
+        full_name = os.path.join(self.model_path, file_name)
+        tensors = {}
+        with safe_open(full_name, framework="pt", device="cpu") as f:
+            for key in f.keys():
+                if key in params:
+                    tensor = f.get_tensor(key)
+                    tensors[key] = tensor
+        return tensors
+
+    def _slice_mp(self, t, dim):
+        if self._tp_size is None or self._tp_size == 1:
+            return t
+        full_size = list(t.shape)[dim]
+        assert full_size % self._tp_size == 0
+        split_size = full_size // self._tp_size
+        return torch.split(t, split_size, dim=dim)[self._tp_rank].contiguous()
+
+    def _load_layer_tensor(self, layer_name):
+        assert (
+            layer_name in self._layer_to_params
+        ), f"{layer_name} not in {self._layer_to_params.keys()}"
+        hf_layer_names = self._layer_to_params[layer_name]
+        file_to_layers = defaultdict(set)
+        tensors = {}
+        for layer in hf_layer_names:
+            file = self._weight_mapping[layer]
+            file_to_layers[file].add(layer)
+        for file_name, params in file_to_layers.items():
+            tensors.update(self.load_safe_tensor_file(file_name, params))
+        assert len(hf_layer_names) == len(tensors)
+        return tensors
+
+    def _handle_fuse_up_gate(self, gate_proj, up_proj, ffn_hidden_size, hidden_size):
+        assert self._fuse_up_gate
+        gate_up_proj = torch.cat([gate_proj, up_proj], dim=0)
+        gate_up_proj = gate_up_proj.view(-1, ffn_hidden_size, hidden_size)
+        gate_up_proj = self._slice_mp(gate_up_proj, dim=1).reshape(-1, hidden_size)
+        return gate_up_proj.contiguous()
+
+    def _handle_qkv(self, q, k, v):
+        query_per_group = self._head_num // self._num_query_groups
+        q_groups = torch.split(q, query_per_group * self._kv_channel, dim=0)
+        k_groups = torch.split(k, self._kv_channel, dim=0)
+        v_groups = torch.split(v, self._kv_channel, dim=0)
+        fused_groups = [
+            torch.cat([qg, kg, vg], dim=0) for qg, kg, vg in zip(q_groups, k_groups, v_groups)
+        ]
+        fused = torch.cat(fused_groups, dim=0)
+        fused = self._slice_mp(fused, 0)
+        return fused
+
+    def pop_non_local_experts(self, tensors):
+        tensors_to_pop = []
+        for key in tensors.keys():
+            if ".experts." in key:
+                num = int(key.split(".")[-3])
+                if num < self.expert_begin or num >= self.expert_end:
+                    tensors_to_pop.append(key)
+        for non_local_expert in tensors_to_pop:
+            tensors.pop(non_local_expert)
+
+    def load_embedding_or_lm_head(self, name):
+        tensors = self._load_layer_tensor(name)
+        for key, tensor in tensors.items():  ## only 1 tensor
+            embedding = tensor
+            hf_name = key
+        vocab_size, _ = embedding.shape
+        if vocab_size >= self._padded_vocab_size:
+            embedding = embedding[: self._padded_vocab_size, :]
+        else:
+            assert (
+                False
+            ), f"ckpt.vocab_size={vocab_size}, padded_vocab_size={self._padded_vocab_size}"
+        tensors[hf_name] = embedding
+        return tensors
+
+    def calculate_begin_and_end(self, stage):
+        # pp begin and end
+        pp_size = self._pp_size if self._pp_size is not None else 1
+        assert stage < pp_size
+        if self._first_pipeline_num_layers is None and self._last_pipeline_num_layers is None:
+            layer_per_stage = self._num_layers // pp_size
+            begin = layer_per_stage * stage
+            end = layer_per_stage * (stage + 1)
+        else:
+            first_last_layers = (
+                self._first_pipeline_num_layers,
+                self._last_pipeline_num_layers,
+            )
+            middle_num_stages = pp_size - sum(
+                [1 if x is not None else 0 for x in first_last_layers]
+            )
+
+            middle_num_layers = self._num_layers - sum(
+                [x if x is not None else 0 for x in first_last_layers]
+            )
+            middle_per_stage = middle_num_layers // middle_num_stages
+
+            if self._first_pipeline_num_layers is None:
+                self._first_pipeline_num_layers = middle_per_stage
+            if self._last_pipeline_num_layers is None:
+                self._last_pipeline_num_layers = middle_per_stage
+
+            if stage == 0:
+                begin = 0
+                end = self._first_pipeline_num_layers
+            elif stage == pp_size - 1:
+                begin = self._num_layers - self._last_pipeline_num_layers
+                end = self._num_layers
+            else:
+                begin = self._first_pipeline_num_layers + (stage - 1) * middle_per_stage
+                end = begin + middle_per_stage
+        assert begin < end
+        self.begin = begin
+        self.end = end
+
+        # ep begin and end
+        if self._num_experts is not None:
+            assert self._num_experts % self._ep_size == 0
+            num_local_expert = self._num_experts // self._ep_size
+            self.expert_begin = self._ep_rank * num_local_expert
+            self.expert_end = (self._ep_rank + 1) * num_local_expert
+        else:
+            self.expert_begin = None
+            self.expert_end = None
+
+    def process_tensor_operations(self, state_dict):
+        tensors_adjusted = {}
+        # special_layers = ("norm", "gate.", "_a_proj", "_b_proj")
+        # split and fuse
+        tensor_names = list(state_dict.keys())
+        for hf_layer_name in tensor_names:
+            
+            if ".k_proj." in hf_layer_name or ".v_proj." in hf_layer_name or ".gate_proj." in hf_layer_name:
+                # skip k_proj, v_proj, gate_proj
+                continue
+            
+            tensor = state_dict[hf_layer_name]
+            is_split, split_dim = self.get_split_mode(hf_layer_name)
+
+            # non fuse but split tensor
+            if is_split:
+                tensor = self._slice_mp(tensor, split_dim)
+                tensors_adjusted[hf_layer_name] = tensor.contiguous()
+
+            # # non fuse and non split tensor
+            # elif any(key in hf_layer_name for key in special_layers):
+            #     tensors_adjusted[hf_layer_name] = tensor.contiguous()
+
+            # qkv fuse
+            if not self._multi_latent_attention and self.is_qkv_fusion(
+                hf_layer_name
+            ):  # only query true
+                k_name = hf_layer_name.replace(self.Q_NAME, ".k_proj.")
+                v_name = hf_layer_name.replace(self.Q_NAME, ".v_proj.")
+                qkv_fused = self._handle_qkv(
+                    state_dict.pop(hf_layer_name), state_dict.pop(k_name), state_dict.pop(v_name)
+                )
+                mg_fuse_name = hf_layer_name.replace(self.Q_NAME, self.QKV_FUSE_NAME)
+                tensors_adjusted[mg_fuse_name] = qkv_fused.contiguous()
+
+            # gate up fuse
+            elif self.UP_NAME in hf_layer_name:  # only up true
+                gate_name = hf_layer_name.replace(self.UP_NAME, ".gate_proj.")
+                _ffn_hidden_size = (
+                    self._moe_ffn_hidden_size
+                    if "share" not in gate_name
+                    else self._moe_shared_expert_intermediate_size
+                )
+                _ffn_hidden_size = (
+                    self._ffn_hidden_size if "expert" not in gate_name else _ffn_hidden_size
+                )
+                gate_up_proj = self._handle_fuse_up_gate(
+                    state_dict.pop(gate_name),
+                    state_dict.pop(hf_layer_name),
+                    _ffn_hidden_size,
+                    self._hidden_size,
+                )
+                up_gate_out_name = hf_layer_name.replace(self.UP_NAME, self.GATE_UP_FUSE_NAME)
+                tensors_adjusted[up_gate_out_name] = gate_up_proj.contiguous()
+            else:
+                tensors_adjusted[hf_layer_name] = tensor.contiguous()
+
+        return tensors_adjusted
+
+    def convert_layer_name(self, hf_name):
+        # 专家索引调整
+        if ".experts." in hf_name:
+            hf_name = re.sub(
+                r"\.experts\.(\d+)",
+                lambda m: f".experts.{int(m.group(1)) - self.expert_begin}",
+                hf_name,
+            )
+        # 基础名称转换
+        mg_name = self.hf_to_mcore_name(hf_name)
+        # 层号调整
+        mg_name = re.sub(
+            r"(decoder\.layers\.)(\d+)(\..*)",
+            lambda m: f"{m.group(1)}{int(m.group(2))-self.begin}{m.group(3)}",
+            mg_name,
+        )
+        # Grouped GEMM特殊处理
+        if self._moe_grouped_gemm and ".experts." in mg_name:
+            mg_name = re.sub(r"\.experts\.(\d+)\.(.*?)\.weight$", r".experts.\2.weight\1", mg_name)
+        return mg_name
+
+    def refactor_tensors(self, state_dict):
+        self.pop_non_local_experts(state_dict)
+        # splite and fuse
+        state_dict = self.process_tensor_operations(state_dict)
+
+        # transfer hf name to mcore name
+        layer_names = list(state_dict.keys())
+        for layer_name in layer_names:
+            mg_layer_name = self.convert_layer_name(layer_name)
+            tensor = state_dict.pop(layer_name)
+            state_dict[mg_layer_name] = tensor
+            
+
+        return state_dict
+
+    def load_all_tensors(self):
+        state_dict = {}
+        for i in tqdm(range(self.begin, self.end)):
+            tmp = self._load_layer_tensor(i)
+            state_dict.update(tmp)
+
+        if self.begin == 0:
+            tmp = self.load_embedding_or_lm_head("embedding")
+            assert len(tmp) == 1
+            state_dict.update(tmp)
+
+        if self.end == self._num_layers:
+            tmp = self._load_layer_tensor("final_norm")
+            state_dict.update(tmp)
+            if self.untie_embeddings_and_output_weights:
+                tmp = self.load_embedding_or_lm_head("lm_head")
+                state_dict.update(tmp)
+            elif self._pp_size > 1:
+                tmp = self.load_embedding_or_lm_head("embedding")
+                assert len(tmp) == 1
+                state_dict.update(tmp)
+                tensor = state_dict["model.embed_tokens.weight"]
+                state_dict["lm_head.weight"] = tensor
+                state_dict.pop("model.embed_tokens.weight")
+
+        return state_dict
+
+    def load(self):
+        self.adjust_mapping_table()
+        self.calculate_begin_and_end(self._pp_rank)
+        state_dict = self.load_all_tensors()
+        return self.refactor_tensors(state_dict)
+
+
diff --git a/verl/models/mcore/model_forward_fused.py b/verl/models/mcore/model_forward_fused.py
index 7c9189aada2..643994bb818 100644
--- a/verl/models/mcore/model_forward_fused.py
+++ b/verl/models/mcore/model_forward_fused.py
@@ -20,7 +20,10 @@
 import torch
 from megatron.core import parallel_state
 from megatron.core.config_logger import has_config_logger_enabled, log_config_to_disk
-from megatron.core.inference.contexts import BaseInferenceContext
+try:
+    from megatron.core.inference.contexts import BaseInferenceContext
+except Exception:
+    BaseInferenceContext = None
 from megatron.core.models.gpt.gpt_model import GPTModel
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.tensor_parallel.mappings import gather_from_sequence_parallel_region
diff --git a/verl/models/mcore/model_initializer.py b/verl/models/mcore/model_initializer.py
index 49a30bc9e2c..8964476d54c 100644
--- a/verl/models/mcore/model_initializer.py
+++ b/verl/models/mcore/model_initializer.py
@@ -18,7 +18,7 @@
 import inspect
 from abc import ABC, abstractmethod
 
-from megatron.core.models.gpt.gpt_layer_specs import get_gpt_decoder_block_spec, get_gpt_mtp_block_spec
+from megatron.core.models.gpt.gpt_layer_specs import get_gpt_decoder_block_spec
 from megatron.core.models.gpt.gpt_model import GPTModel
 
 from .config_converter import PretrainedConfig, TransformerConfig
@@ -190,9 +190,8 @@ def initialize(
         # MTP
         if self.tfconfig.mtp_num_layers is not None and self.tfconfig.mtp_num_layers > 0:
             transformer_layer_spec = self.get_transformer_layer_spec(vp_stage=vp_stage)
-            mtp_block_spec = get_gpt_mtp_block_spec(
-                self.tfconfig, transformer_layer_spec, use_transformer_engine=True, vp_stage=vp_stage
-            )
+            from megatron.core.models.gpt.gpt_layer_specs import get_gpt_mtp_block_spec
+            mtp_block_spec = get_gpt_mtp_block_spec(self.tfconfig, transformer_layer_spec, use_transformer_engine=True, vp_stage=vp_stage)
             kwargs["mtp_block_spec"] = mtp_block_spec
 
         model = super().initialize(**kwargs)
@@ -265,12 +264,59 @@ def initialize(
             parallel_output=True,
             language_share_embeddings_and_output_weights=share_embeddings_and_output_weights,
         )
-
         if post_process and value:
             from verl.models.llama.megatron.layers.parallel_linear import LinearForLastLayer
-
             qwen25_vl_model.language_model.output_layer = LinearForLastLayer(
                 input_size=tfconfig.hidden_size, output_size=1, config=tfconfig
             )
 
         return qwen25_vl_model
+
+class XdgMoEModel(BaseModelInitializer):
+    """Initializer for XDG MoE models."""
+
+    def get_transformer_layer_spec(self):
+        from cybertron.models.deepseek_v2.layer_specs_deepseekv2 import get_gpt_layer_with_transformer_engine_spec
+        transformer_layer_spec = get_gpt_layer_with_transformer_engine_spec(
+            self.tfconfig.num_moe_experts,
+            self.tfconfig.moe_grouped_gemm,
+            qk_layernorm=self.tfconfig.qk_layernorm,
+            multi_latent_attention=self.tfconfig.multi_latent_attention
+        )
+        return transformer_layer_spec
+    
+    def initialize(
+        self,
+        pre_process: bool = True,
+        post_process: bool = True,
+        share_embeddings_and_output_weights: bool = False,
+        value: bool = False,
+        **extra_kwargs,
+    ):
+        freeze_moe_router = extra_kwargs.get("freeze_moe_router", True)
+        if freeze_moe_router:
+            self.tfconfig.moe_router_load_balancing_type = "none"
+        
+        transformer_layer_spec = self.get_transformer_layer_spec()
+        model = GPTModel(
+            config=self.tfconfig,
+            transformer_layer_spec=transformer_layer_spec,
+            vocab_size=self.hf_config.vocab_size,
+            max_sequence_length=self.hf_config.max_position_embeddings,
+            pre_process=pre_process,
+            post_process=post_process,
+            share_embeddings_and_output_weights=share_embeddings_and_output_weights,
+            position_embedding_type="rope",
+            rotary_base=self.hf_config.rope_theta,
+        )
+
+        if post_process and value:
+            from verl.models.llama.megatron.layers.parallel_linear import LinearForLastLayer
+
+            model.output_layer = LinearForLastLayer(input_size=self.tfconfig.hidden_size, output_size=1, config=self.tfconfig)
+
+        if freeze_moe_router:
+            for layer in model.decoder.layers:
+                if hasattr(layer.mlp, "router"):
+                    layer.mlp.router.weight.requires_grad = False
+        return model
diff --git a/verl/models/mcore/qwen2_5_vl/attention.py b/verl/models/mcore/qwen2_5_vl/attention.py
index 91a27cc3edf..7ddacf8e7f3 100644
--- a/verl/models/mcore/qwen2_5_vl/attention.py
+++ b/verl/models/mcore/qwen2_5_vl/attention.py
@@ -14,6 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Optional
+BaseInferenceContext = None
+PackedSeqParams = None
 from megatron.core.transformer.attention import *
 
 from .rope_utils import apply_rotary_pos_emb_absolute
diff --git a/verl/models/mcore/qwen2_5_vl/vision_transformer_block.py b/verl/models/mcore/qwen2_5_vl/vision_transformer_block.py
index 8f765a0ff63..e70e917b679 100644
--- a/verl/models/mcore/qwen2_5_vl/vision_transformer_block.py
+++ b/verl/models/mcore/qwen2_5_vl/vision_transformer_block.py
@@ -14,7 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
+WrappedTensor = None
+BaseInferenceContext = None
 from megatron.core.transformer.transformer_block import *
 
 
diff --git a/verl/models/mcore/registry.py b/verl/models/mcore/registry.py
index 23f01e8b7aa..05f865f16e8 100644
--- a/verl/models/mcore/registry.py
+++ b/verl/models/mcore/registry.py
@@ -32,6 +32,7 @@
     hf_to_mcore_config_qwen2_5_vl,
     hf_to_mcore_config_qwen2moe,
     hf_to_mcore_config_qwen3moe,
+    hf_to_mcore_config_xdgmoe,
 )
 from .model_forward import (
     gptmodel_forward,
@@ -49,6 +50,7 @@
     Qwen2MoEModel,
     Qwen3MoEModel,
     Qwen25VLModel,
+    XdgMoEModel
 )
 from .weight_converter import (
     McoreToHFWeightConverterDense,
@@ -57,6 +59,7 @@
     McoreToHFWeightConverterQwen2_5_VL,
     McoreToHFWeightConverterQwen2Moe,
     McoreToHFWeightConverterQwen3Moe,
+    McoreToHFWeightConverterXdgMoE,
 )
 
 
@@ -70,6 +73,7 @@ class SupportedModel(Enum):
     LLAMA4 = "Llama4ForConditionalGeneration"  # not tested
     QWEN3 = "Qwen3ForCausalLM"  # tested
     QWEN3_MOE = "Qwen3MoeForCausalLM"  # not tested
+    XdgMoE = "XdgMoEForCausalLM"
 
 
 # Registry for model configuration converters
@@ -83,7 +87,7 @@ class SupportedModel(Enum):
     SupportedModel.LLAMA4: hf_to_mcore_config_llama4,
     SupportedModel.QWEN3: hf_to_mcore_config_dense,
     SupportedModel.QWEN3_MOE: hf_to_mcore_config_qwen3moe,
-    SupportedModel.QWEN2_5_VL: hf_to_mcore_config_qwen2_5_vl,
+    SupportedModel.XdgMoE: hf_to_mcore_config_xdgmoe,
 }
 
 # Registry for model initializers
@@ -97,7 +101,7 @@ class SupportedModel(Enum):
     SupportedModel.LLAMA4: DenseModel,
     SupportedModel.QWEN3: DenseModel,
     SupportedModel.QWEN3_MOE: Qwen3MoEModel,
-    SupportedModel.QWEN2_5_VL: Qwen25VLModel,
+    SupportedModel.XdgMoE: XdgMoEModel,
 }
 
 # Registry for model forward functions
@@ -113,6 +117,7 @@ class SupportedModel(Enum):
     SupportedModel.QWEN3_MOE: gptmodel_forward,
     SupportedModel.QWEN2_5_VL: gptmodel_forward_qwen2_5_vl,
     SupportedModel.DEEPSEEK_V3: gptmodel_forward,
+    SupportedModel.XdgMoE: gptmodel_forward,
 }
 
 # Registry for model forward functions
@@ -140,6 +145,7 @@ class SupportedModel(Enum):
     SupportedModel.QWEN3: McoreToHFWeightConverterDense,
     SupportedModel.QWEN3_MOE: McoreToHFWeightConverterQwen3Moe,
     SupportedModel.QWEN2_5_VL: McoreToHFWeightConverterQwen2_5_VL,
+    SupportedModel.XdgMoE: McoreToHFWeightConverterXdgMoE,
 }
 
 
diff --git a/verl/models/mcore/saver.py b/verl/models/mcore/saver.py
index 2a954b2417c..6d40bcf4e1f 100644
--- a/verl/models/mcore/saver.py
+++ b/verl/models/mcore/saver.py
@@ -115,6 +115,7 @@ def _get_gpt_model(model):
     if not isinstance(wrapped_models, list | tuple):
         wrapped_models = list(wrapped_models)
 
+    # TODO: support uneven pp
     assert len(wrapped_models) == virtual_pp_size
     num_layers_per_model = config.num_hidden_layers // pp_size // virtual_pp_size
     assert num_layers_per_model * pp_size * virtual_pp_size == config.num_hidden_layers
@@ -383,12 +384,12 @@ def _broadcast_tp_shard_tensor_qkv(tensor, q_name, k_name, v_name, src_pp_rank):
             if gpt_model_module.config.qk_layernorm:
                 _broadcast_tensor(
                     sync_layer.self_attention.q_layernorm.weight,
-                    f"{layer_name}.self_attn.q_norm.weight",
+                    f"{layer_name}.self_attn.q_layernorm.weight",
                     src_pp_rank=src_pp_rank,
                 )
                 _broadcast_tensor(
                     sync_layer.self_attention.k_layernorm.weight,
-                    f"{layer_name}.self_attn.k_norm.weight",
+                    f"{layer_name}.self_attn.k_layernorm.weight",
                     src_pp_rank=src_pp_rank,
                 )
 
@@ -417,7 +418,7 @@ def _broadcast_tp_shard_tensor_qkv(tensor, q_name, k_name, v_name, src_pp_rank):
             )
 
             _broadcast_tensor(
-                sync_layer.mlp.linear_fc1.layer_norm_weight,
+                sync_layer.pre_mlp_layernorm.weight,
                 f"{layer_name}.post_attention_layernorm.weight",
                 src_pp_rank=src_pp_rank,
             )
@@ -495,3 +496,5 @@ def merge_megatron_ckpt_gptmodel_mixtral(
     wrapped_models, config, dtype, is_value_model=False, tie_word_embeddings=False
 ):
     raise NotImplementedError("merge_megatron_ckpt_gptmodel_mixtral is not implemented")
+
+
diff --git a/verl/models/mcore/weight_converter.py b/verl/models/mcore/weight_converter.py
index 791513f32d1..16b82730bf1 100644
--- a/verl/models/mcore/weight_converter.py
+++ b/verl/models/mcore/weight_converter.py
@@ -477,3 +477,122 @@ def _convert_mlp_param(self, name: str, params: list[torch.Tensor]) -> tuple[lis
         else:
             raise NotImplementedError(f"Unsupported parameter name: {name}")
         return convert_names, params
+    
+class McoreToHFWeightConverterXdgMoE(McoreToHFWeightConverterBase):
+    def _convert_attention_param(self, name: str, params: list[torch.Tensor]) -> tuple[list[str], list[torch.Tensor]]:
+        # 'decoder.layers.0.self_attention.linear_proj.weight'
+        # 'decoder.layers.0.self_attention.linear_qkv.layer_norm_weight'
+        # 'decoder.layers.0.self_attention.linear_qkv.weight'
+        # 'decoder.layers.0.self_attention.linear_qkv.bias'
+        layer_number = name.split(".")[2]
+        convert_names = []
+        if "self_attention.linear_qkv.bias" in name or "self_attention.linear_qkv.weight" in name:
+            param_type = name.split(".")[-1]
+            assert param_type == "bias" or param_type == "weight"
+            convert_names.append(f"model.layers.{layer_number}.self_attn.q_proj.{param_type}")
+            convert_names.append(f"model.layers.{layer_number}.self_attn.k_proj.{param_type}")
+            convert_names.append(f"model.layers.{layer_number}.self_attn.v_proj.{param_type}")
+            # AssertionError: Expected 3 params, got 1 for decoder.layers.0.self_attention.linear_qkv.weight, 0, weight
+            assert len(params) == 3, f"Expected 3 params, got {len(params)} for {name}, {layer_number}, {param_type}"
+        elif "self_attention.linear_proj.weight" in name:
+            convert_names.append(f"model.layers.{layer_number}.self_attn.o_proj.weight")
+            assert len(params) == 1
+        elif "self_attention.linear_qkv.layer_norm_weight" in name:
+            convert_names.append(f"model.layers.{layer_number}.input_layernorm.weight")
+            assert len(params) == 1
+        elif "self_attention.q_layernorm.weight" in name:
+            convert_names.append(f"model.layers.{layer_number}.self_attn.q_norm.weight")
+            assert len(params) == 1
+        elif "self_attention.k_layernorm.weight" in name:
+            convert_names.append(f"model.layers.{layer_number}.self_attn.k_norm.weight")
+            assert len(params) == 1
+        elif "input_layernorm.weight" in name:
+            convert_names.append(f"model.layers.{layer_number}.input_layernorm.weight")
+        else:
+            raise NotImplementedError(f"Unsupported parameter name: {name}")
+        return convert_names, params
+    
+    def _convert_mlp_param(self, name: str, params: list[torch.Tensor]) -> tuple[list[str], list[torch.Tensor]]:
+        # mcore dense
+        # 'decoder.layers.0.mlp.linear_fc1.layer_norm_weight'
+        # 'decoder.layers.0.mlp.linear_fc2.weight'
+        # 'decoder.layers.0.mlp.linear_fc1.weight'
+        #       ---
+        # 'decoder.layers.1.mlp.shared_experts.linear_fc1.weight'
+        #       ---
+        # 'decoder.layers.1.mlp.shared_experts.linear_fc2.weight'
+        # hf dense
+        # 'model.layers.0.post_attention_layernorm.weight'
+        # 'model.layers.0.mlp.down_proj.weight'
+        # 'model.layers.0.mlp.gate_proj.weight'
+        # 'model.layers.0.mlp.up_proj.weight'
+        # 'model.layers.1.mlp.shared_experts.gate_proj.weight'
+        # 'model.layers.1.mlp.shared_experts.up_proj.weight'
+        # 'model.layers.1.mlp.shared_experts.down_proj.weight'
+
+        # mcore moe
+        # 'decoder.layers.1.pre_mlp_layernorm.weight'
+        # 'decoder.layers.1.mlp.router.weight'
+        # 'decoder.layers.1.mlp.router.expert_bias'
+        # 'decoder.layers.1.mlp.experts.linear_fc1.weight0'
+        #       ---
+        # 'decoder.layers.1.mlp.experts.linear_fc2.weight0'
+        # hf moe
+        # 'model.layers.1.post_attention_layernorm.weight'
+        # 'model.layers.1.mlp.gate.weight'
+        # 'model.layers.1.mlp.gate.e_score_correction_bias'
+        # 'model.layers.1.mlp.experts.0.gate_proj.weight'
+        # 'model.layers.1.mlp.experts.0.up_proj.weight'
+        # 'model.layers.1.mlp.experts.0.down_proj.weight'
+
+        name_map_after_layer = {
+            "mlp.linear_fc1.layer_norm_weight": "post_attention_layernorm.weight",
+            "mlp.linear_fc2.weight": "mlp.down_proj.weight",
+            "mlp.shared_experts.linear_fc2.weight": "mlp.shared_experts.down_proj.weight",
+            "mlp.linear_fc1.weight": ["mlp.gate_proj.weight", "mlp.up_proj.weight"],
+            "mlp.shared_experts.linear_fc1.weight": ["mlp.shared_experts.gate_proj.weight", "mlp.shared_experts.up_proj.weight"],
+            "pre_mlp_layernorm.weight": "post_attention_layernorm.weight",
+            "mlp.router.weight": "mlp.gate.weight",
+            "mlp.router.e_score_correction_bias": "mlp.gate.e_score_correction_bias",
+        }
+        convert_names = []
+        layer_number = name.split(".")[2]
+        name_after_layer = name.split(f".{layer_number}.")[1]
+        if name_after_layer in name_map_after_layer:
+            mapped_name = name_map_after_layer[name_after_layer]
+            if isinstance(mapped_name, list):
+                assert len(params) == len(mapped_name)
+                for one in mapped_name:
+                    convert_names.append(f"model.layers.{layer_number}.{one}")
+            else:
+                assert len(params) == 1
+                convert_names.append(f"model.layers.{layer_number}.{mapped_name}")
+        else:
+            if "mlp.experts.linear_fc1.weight" in name:
+                expert_id = name.split("weight")[-1]
+                convert_names.append(f"model.layers.{layer_number}.mlp.experts.{expert_id}.gate_proj.weight")
+                convert_names.append(f"model.layers.{layer_number}.mlp.experts.{expert_id}.up_proj.weight")
+                assert len(params) == 2
+            elif "mlp.experts.linear_fc2.weight" in name:
+                expert_id = name.split("weight")[-1]
+                convert_names.append(f"model.layers.{layer_number}.mlp.experts.{expert_id}.down_proj.weight")
+                assert len(params) == 1
+            else:
+                raise NotImplementedError(f"Unsupported parameter name: {name}")
+
+        return convert_names, params
+    
+    def convert_param(self, name: str, params_one_group: list[torch.Tensor]) -> tuple[list[str], list[torch.Tensor]]:
+        direct_name_mapping = {
+            "embedding.word_embeddings.weight": "model.embed_tokens.weight",
+            "decoder.final_layernorm.weight": "model.norm.weight",
+            "output_layer.weight": "lm_head.weight",
+        }
+        if name in direct_name_mapping:
+            return [direct_name_mapping[name]], [params_one_group[0]]
+        elif "self_attention" in name or "input_layernorm.weight" in name:
+            return self._convert_attention_param(name, params_one_group)
+        elif "mlp" in name:
+            return self._convert_mlp_param(name, params_one_group)
+        else:
+            raise NotImplementedError(f"Unsupported parameter name: {name}")
\ No newline at end of file
diff --git a/verl/models/transformers/monkey_patch.py b/verl/models/transformers/monkey_patch.py
index 7f0e10ab65e..0b1ac3c042d 100644
--- a/verl/models/transformers/monkey_patch.py
+++ b/verl/models/transformers/monkey_patch.py
@@ -297,7 +297,23 @@ def state_dict(self, *args, **kwargs):
             print("Not support fused kernels for KimiVL")
 
         return
+    elif model.config.model_type == "xdgmoe":
+        module = sys.modules[model.__class__.__module__]
+        real_cls = getattr(module, "XdgMoEFlashAttention2")
 
+        if use_remove_padding or ulysses_sp_size > 1:
+            from verl.models.transformers.xdgmoe import ulysses_flash_attn_forward
+
+            real_cls.forward = ulysses_flash_attn_forward
+            print("Monkey patch FlashAttention2.forward in XdgMoE")
+
+        if use_fused_kernels:
+            # TODO
+            pass 
+            # from verl.models.transformers.qwen2_vl import forward_for_ppo
+            # XdgMoEForCausalLM.forward = forward_for_ppo
+
+        return 
     # transformers<=4.47.1
     if use_remove_padding or ulysses_sp_size > 1:
         if hasattr(module, "_flash_attention_forward"):
diff --git a/verl/models/transformers/xdgmoe.py b/verl/models/transformers/xdgmoe.py
new file mode 100644
index 00000000000..4b339b89b0d
--- /dev/null
+++ b/verl/models/transformers/xdgmoe.py
@@ -0,0 +1,126 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import inspect
+import os
+from dataclasses import dataclass
+from typing import List, Optional, Tuple, Union
+from transformers.cache_utils import Cache, DynamicCache
+import torch
+from transformers.modeling_flash_attention_utils import _flash_attention_forward
+
+from transformers.utils import is_flash_attn_greater_or_equal
+
+from verl.utils.ulysses import (
+    gather_heads_scatter_seq,
+    gather_position_ids_seq_dim,
+    gather_seq_scatter_heads,
+    get_ulysses_sequence_parallel_world_size,
+    validate_ulysses_config,
+)
+
+try:
+    from transformers.modeling_flash_attention_utils import flash_attn_func, flash_attn_varlen_func
+
+    _flash_supports_window_size = "window_size" in list(inspect.signature(flash_attn_func).parameters)
+except ImportError:
+    flash_attn_varlen_func = None
+
+
+
+
+def ulysses_flash_attn_forward(
+    self,
+    hidden_states: torch.Tensor,
+    attention_mask: Optional[torch.Tensor] = None,
+    position_ids: Optional[torch.LongTensor] = None,
+    past_key_value: Optional[Cache] = None,
+    output_attentions: bool = False,
+    **kwargs,
+) -> Tuple[torch.Tensor, None, None]:
+    # print('========monkey patch========')
+    from moe_trainer.modeling_xdgmoe import repeat_kv, apply_rotary_pos_emb
+
+    bsz, q_len, _ = hidden_states.size()  # q_len = seq_length / sp_size
+    query_states = self.q_proj(hidden_states)  # (batch_size, seq_length / sp_size, num_heads * head_size)
+    key_states = self.k_proj(hidden_states)
+    value_states = self.v_proj(hidden_states)
+
+    query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+    key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+    value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+
+    if self.qk_layernorm:
+        query_states = self.q_layernorm(query_states)
+        key_states = self.k_layernorm(key_states)
+
+    ulysses_sp_size = get_ulysses_sequence_parallel_world_size()
+    # print('Before', position_ids.shape, key_states.shape)
+    if ulysses_sp_size > 1:
+        validate_ulysses_config(self.num_heads, ulysses_sp_size)
+
+        key_states = repeat_kv(key_states, self.num_key_value_groups)
+        value_states = repeat_kv(value_states, self.num_key_value_groups)
+        query_states = gather_seq_scatter_heads(query_states, seq_dim=2, head_dim=1)
+        key_states = gather_seq_scatter_heads(key_states, seq_dim=2, head_dim=1)
+        value_states = gather_seq_scatter_heads(value_states, seq_dim=2, head_dim=1)
+        # (batch_size, num_head / sp_size, seq_length, head_size)
+        position_ids = gather_position_ids_seq_dim(position_ids)
+        full_q_len = query_states.size(2)  # full_q_len = seq_length
+    else:
+        full_q_len = q_len
+
+    kv_seq_len = key_states.shape[-2]
+    if past_key_value is not None:
+        kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
+    cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len) 
+    # print('After', query_states.shape, key_states.shape, cos.shape, sin.shape, position_ids.shape)
+
+    # print('value', position_ids.shape, cos.shape, sin.shape, query_states.shape, key_states.shape, value_states.shape, kv_seq_len)
+    query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
+    dropout_rate = 0.0 if not self.training else self.attention_dropout
+
+    # Reashape to the expected shape for Flash Attention
+    query_states = query_states.transpose(1, 2)
+    key_states = key_states.transpose(1, 2)
+    value_states = value_states.transpose(1, 2)
+
+    input_dtype = query_states.dtype
+    if input_dtype == torch.float32:
+        # Handle the case where the model is quantized
+        if hasattr(self.config, "_pre_quantization_dtype"):
+            target_dtype = self.config._pre_quantization_dtype
+        elif torch.is_autocast_enabled():
+            target_dtype = torch.get_autocast_gpu_dtype()
+        else:
+            target_dtype = self.q_proj.weight.dtype
+
+        query_states = query_states.to(target_dtype)
+        key_states = key_states.to(target_dtype)
+        value_states = value_states.to(target_dtype)
+    
+    attn_output = self._flash_attention_forward(
+            query_states, key_states, value_states, attention_mask, q_len, dropout=dropout_rate
+        )
+        
+    if ulysses_sp_size > 1:
+        attn_output = gather_heads_scatter_seq(attn_output, head_dim=2, seq_dim=1)
+
+    attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous()
+    attn_output = self.o_proj(attn_output)
+
+    if not output_attentions:
+            attn_weights = None
+
+    return attn_output, attn_weights, past_key_value
diff --git a/verl/models/weight_loader_registry.py b/verl/models/weight_loader_registry.py
index 8aa3bc71f84..1be18f5120e 100644
--- a/verl/models/weight_loader_registry.py
+++ b/verl/models/weight_loader_registry.py
@@ -47,6 +47,7 @@ def get_weight_saver(arch: str):
         "DeepseekV3ForCausalLM": merge_megatron_ckpt_gptmodel_dpskv3,
         "Qwen3ForCausalLM": merge_megatron_ckpt_gptmodel,
         "Qwen3MoeForCausalLM": merge_megatron_ckpt_gptmodel_qwen_moe,
+        "XdgMoEForCausalLM": merge_megatron_ckpt_gptmodel,
     }
     if arch in _MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY:
         return _MODEL_WEIGHT_MEGATRON_SAVER_REGISTRY[arch]
diff --git a/verl/single_controller/base/decorator.py b/verl/single_controller/base/decorator.py
index cbd27de4965..5d7f2b57cb5 100644
--- a/verl/single_controller/base/decorator.py
+++ b/verl/single_controller/base/decorator.py
@@ -192,7 +192,6 @@ def dispatch_dp_compute_data_proto_with_func(worker_group, *args, **kwargs):
     splitted_args_with_func = [[args[0]] * worker_group.world_size] + splitted_args
     return splitted_args_with_func, splitted_kwargs
 
-
 def collect_dp_compute_data_proto(worker_group, output):
     import ray
 
diff --git a/verl/single_controller/base/megatron/worker.py b/verl/single_controller/base/megatron/worker.py
new file mode 100644
index 00000000000..57b3bfca908
--- /dev/null
+++ b/verl/single_controller/base/megatron/worker.py
@@ -0,0 +1,108 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from verl.single_controller.base.worker import DistGlobalInfo, DistRankInfo, Worker
+
+
+class MegatronWorker(Worker):
+    def __init__(self, cuda_visible_devices=None) -> None:
+        super().__init__(cuda_visible_devices)
+
+    def get_megatron_global_info(self):
+        from megatron.core import parallel_state as mpu
+
+        tp_size = mpu.get_tensor_model_parallel_world_size()
+        dp_size = mpu.get_data_parallel_world_size()
+        pp_size = mpu.get_pipeline_model_parallel_world_size()
+        cp_size = mpu.get_context_parallel_world_size()
+        info = DistGlobalInfo(tp_size=tp_size, dp_size=dp_size, pp_size=pp_size, cp_size=cp_size)
+        return info
+
+    def get_megatron_rank_info(self):
+        from megatron.core import parallel_state as mpu
+
+        tp_rank = mpu.get_tensor_model_parallel_rank()
+        dp_rank = mpu.get_data_parallel_rank()
+        pp_rank = mpu.get_pipeline_model_parallel_rank()
+        cp_rank = mpu.get_context_parallel_rank()
+        info = DistRankInfo(tp_rank=tp_rank, dp_rank=dp_rank, pp_rank=pp_rank, cp_rank=cp_rank)
+        return info
+
+    def _init_hf_config_and_tf_config(
+        self,
+        model_path,
+        tokenizer_or_path,
+        dtype,
+        override_model_config,
+        override_transformer_config,
+        trust_remote_code=False,
+    ):
+        from transformers import AutoConfig
+
+        from verl.models.mcore import hf_to_mcore_config
+        from verl.utils import hf_tokenizer
+        from verl.utils.fs import copy_to_local
+        from verl.utils.model import update_model_config
+
+        # Step 1: initialize the tokenizer
+        self.local_path = copy_to_local(model_path)
+        if tokenizer_or_path is None:
+            self.tokenizer = hf_tokenizer(self.local_path, trust_remote_code=trust_remote_code)
+        elif isinstance(tokenizer_or_path, str):
+            self.tokenizer = hf_tokenizer(copy_to_local(tokenizer_or_path), trust_remote_code=trust_remote_code)
+        else:
+            self.tokenizer = tokenizer_or_path
+
+        # Step 2: get the hf
+        import time
+        success = False
+        while not success:
+            try:
+                hf_config = AutoConfig.from_pretrained(self.local_path, trust_remote_code=trust_remote_code)
+                success = True
+            except Exception as e:
+                print(f"Error loading config: {e}")
+                time.sleep(1)
+
+        # Step 3: override the hf config
+        override_config_kwargs = {
+            "bos_token_id": self.tokenizer.bos_token_id,
+            "eos_token_id": self.tokenizer.eos_token_id,
+            "pad_token_id": self.tokenizer.pad_token_id,
+        }
+        override_config_kwargs.update(override_model_config.get("model_config", {}))
+        self.share_embeddings_and_output_weights = getattr(hf_config, "tie_word_embeddings", False)
+        update_model_config(hf_config, override_config_kwargs=override_config_kwargs)
+        self.architectures = getattr(hf_config, "architectures", None)
+        if self.rank == 0:
+            print(f"Model config after override: {hf_config}")
+        tf_config = hf_to_mcore_config(hf_config, dtype, **override_transformer_config)
+
+        def add_optimization_config_to_tf_config(tf_config):
+            # add optimization config to tf_config, e.g. checkpointing
+            if self.config.model.get("enable_gradient_checkpointing", False):
+                gradient_checkpointing_cfg = dict(self.config.model.get("gradient_checkpointing_kwargs", dict()))
+                tf_config.recompute_method = gradient_checkpointing_cfg.get("activations_checkpoint_method", "full")
+                tf_config.recompute_granularity = gradient_checkpointing_cfg.get("activations_checkpoint_granularity", "full")
+                tf_config.recompute_num_layers = gradient_checkpointing_cfg.get("activations_checkpoint_num_layers", -1)
+            if megatron_config := self.config.get("megatron", {}):
+                if extra := megatron_config.get("extra", {}):
+                    for k, v in extra.items():
+                        setattr(tf_config, k, v)
+
+        add_optimization_config_to_tf_config(tf_config)
+
+        print(f"TF config: {tf_config}")
+        self.hf_config = hf_config
+        self.tf_config = tf_config
diff --git a/verl/single_controller/ray/base.py b/verl/single_controller/ray/base.py
index 51260ed6dd1..cac3655b607 100644
--- a/verl/single_controller/ray/base.py
+++ b/verl/single_controller/ray/base.py
@@ -15,6 +15,7 @@
 import logging
 import socket
 from copy import deepcopy
+import os
 from typing import Any, Optional
 
 import ray
@@ -553,12 +554,25 @@ def _execute_remote_single_worker(self, worker, method_name: str, *args, **kwarg
         Returns:
             Remote object reference to the method execution
         """
-        if self.fused_worker_used and method_name not in self.method_names:
+        if self.fused_worker_used:
+            # Prefer direct call if exposed on fused worker; otherwise use fused dispatch
+            if hasattr(worker, method_name):
+                remote_call = getattr(worker, method_name)
+                return remote_call.remote(*args, **kwargs)
             remote_call = getattr(worker, self.fused_worker_execute_fn_name)
             return remote_call.remote(f"{self.sub_cls_name}_fwmn_{method_name}", *args, **kwargs)
         # fused worker not used
-        remote_call = getattr(worker, method_name)
-        return remote_call.remote(*args, **kwargs)
+        if hasattr(worker, method_name):
+            remote_call = getattr(worker, method_name)
+            return remote_call.remote(*args, **kwargs)
+        # Try role-prefixed name as bound in WorkerDict (e.g., 'rollout_generate_sequences')
+        for prefix in ("rollout", "actor", "critic", "reward"):
+            cand = f"{prefix}_{method_name}"
+            if hasattr(worker, cand):
+                remote_call = getattr(worker, cand)
+                return remote_call.remote(*args, **kwargs)
+        # No matching method found; raise explicit error
+        raise AttributeError(f"ActorHandle has no method '{method_name}' or any of {[p+'_'+method_name for p in ('rollout','actor','critic','reward')]}.")
 
     def execute_rank_zero_sync(self, method_name: str, *args, **kwargs):
         """Execute a method on rank zero worker synchronously.
@@ -655,6 +669,28 @@ def execute_all_async(self, method_name: str, *args, **kwargs):
 
         return [self._execute_remote_single_worker(worker, method_name, *args, **kwargs) for worker in self._workers]
 
+    def execute_all_stream(self, method_name: str, *args, **kwargs):
+        """Execute a method on all workers and yield results as they complete.
+        This is non-blocking per-worker and preserves completion order.
+        """
+        refs = self.execute_all_async(method_name, *args, **kwargs)
+        remaining = list(range(len(refs)))
+        ref_list = list(refs)
+        while remaining:
+            done, pending = ray.wait(ref_list, num_returns=1, timeout=None)
+            if not done:
+                break
+            done_ref = done[0]
+            idx = ref_list.index(done_ref)
+            try:
+                result = ray.get(done_ref)
+            except Exception as e:
+                result = e
+            yield idx, result
+            # remove from lists
+            ref_list.pop(idx)
+            remaining.pop(idx)
+
     @property
     def master_address(self):
         return self._master_addr
diff --git a/verl/trainer/config/rollout/rollout.yaml b/verl/trainer/config/rollout/rollout.yaml
index ba6261ed63a..d9605853d0c 100644
--- a/verl/trainer/config/rollout/rollout.yaml
+++ b/verl/trainer/config/rollout/rollout.yaml
@@ -140,6 +140,9 @@ multi_turn:
   # null for no limit (default max_length // 3)
   max_user_turns: null
 
+  max_turns: null
+  enable_tokenization_sanity_check: True
+
   # max parallel call for tools in single turn
   max_parallel_calls: 1
 
diff --git a/verl/trainer/main_ppo.py b/verl/trainer/main_ppo.py
index f7d8825b57d..05ecfaf9a43 100644
--- a/verl/trainer/main_ppo.py
+++ b/verl/trainer/main_ppo.py
@@ -21,10 +21,12 @@
 import hydra
 import ray
 from omegaconf import OmegaConf
+import time
 
 from verl.experimental.dataset.sampler import AbstractSampler
 from verl.trainer.constants_ppo import get_ppo_ray_runtime_env
 from verl.trainer.ppo.ray_trainer import RayPPOTrainer
+from verl.trainer.ppo.ray_async_pipeline_trainer import RayPPOAsyncPipelineTrainer
 from verl.trainer.ppo.reward import load_reward_manager
 from verl.utils.device import is_cuda_available
 from verl.utils.import_utils import load_extern_type
@@ -56,7 +58,7 @@ def run_ppo(config) -> None:
         # NCCL debug level, VLLM logging level, and allow runtime LoRA updating
         # `num_cpus` specifies the number of CPU cores Ray can use, obtained from the configuration
         default_runtime_env = get_ppo_ray_runtime_env()
-        ray_init_kwargs = config.ray_kwargs.get("ray_init", {})
+        ray_init_kwargs = config.ray_kwargs.get("ray_init", {}) if hasattr(config, "ray_kwargs") else {}
         runtime_env_kwargs = ray_init_kwargs.get("runtime_env", {})
         runtime_env = OmegaConf.merge(default_runtime_env, runtime_env_kwargs)
         ray_init_kwargs = OmegaConf.create({**ray_init_kwargs, "runtime_env": runtime_env})
@@ -67,6 +69,7 @@ def run_ppo(config) -> None:
     # Execute the `run` method of the TaskRunner instance remotely and wait for it to complete
     if (
         is_cuda_available
+        and hasattr(config, "global_profiler")
         and config.global_profiler.tool == "nsys"
         and config.global_profiler.get("steps") is not None
         and len(config.global_profiler.get("steps", [])) > 0
@@ -134,7 +137,15 @@ def add_actor_rollout_worker(self, config):
 
         from verl.trainer.ppo.ray_trainer import Role
 
-        self.role_worker_mapping[Role.ActorRollout] = ray.remote(actor_rollout_cls)
+        if config.trainer.get("async_pipeline", False):
+            self.role_worker_mapping.update({
+                Role.Actor: ray.remote(actor_rollout_cls),
+                Role.RefPolicy: ray.remote(actor_rollout_cls),
+                Role.Rollout: ray.remote(actor_rollout_cls),
+                # Role.Critic: ray.remote(CriticWorker),
+            })
+        else:
+            self.role_worker_mapping[Role.ActorRollout] = ray.remote(actor_rollout_cls)
 
         return actor_rollout_cls, ray_worker_group_cls
 
@@ -165,12 +176,59 @@ def init_resource_pool_mgr(self, config):
         """Initialize resource pool manager."""
         from verl.trainer.ppo.ray_trainer import Role
 
-        global_pool_id = "global_pool"
-        resource_pool_spec = {
-            global_pool_id: [config.trainer.n_gpus_per_node] * config.trainer.nnodes,
-        }
-        self.mapping[Role.ActorRollout] = global_pool_id
-        self.mapping[Role.Critic] = global_pool_id
+        if config.trainer.get("async_pipeline", False):
+            global_pool_id = "global_pool"
+            actor_pool_id = "actor_pool"
+            ref_pool_id = "ref_pool"
+            rollout_pool_id = "rollout_pool"
+            # 2x8 
+            # 0.25 0.25 0.5 -> 4+4+8
+            # [4], [4], [8]
+            # trainer.use_nodes_ratios=[0.5,0.5,0.5,0.5] 
+            # means: train/logp/ref_logp use 0.5 ngpus, generate use 0.5 ngpus
+            total_use_nodes_ratio = config.trainer.use_nodes_ratios
+            total_ngpus = config.trainer.n_gpus_per_node * config.trainer.nnodes
+            actor_use_nodes_ratio, logp_use_nodes_ratio, ref_use_nodes_ratio, rollout_use_nodes_ratio = total_use_nodes_ratio
+            actor_pool_size = int(actor_use_nodes_ratio * total_ngpus)
+            ref_pool_size = int(ref_use_nodes_ratio * total_ngpus)
+            rollout_pool_size = int(rollout_use_nodes_ratio * total_ngpus)
+            def gen_pool_spec(pool_size):
+                """Generate a pool spec for the given pool size."""
+                nper_node = config.trainer.n_gpus_per_node
+                pool_nodes = pool_size // nper_node
+                if pool_nodes > 0 and pool_size != nper_node * pool_nodes:
+                    raise ValueError(f"Pool size {pool_size} must be a multiple of n_gpus_per_node {nper_node}. \
+                        or this setting will get poor performance.")
+                return [config.trainer.n_gpus_per_node] * pool_nodes if pool_nodes > 1 else [pool_size]
+        
+            # TODO: check hybrid actor/ref
+            hybrid_actor_ref = actor_pool_size == ref_pool_size
+            
+            resource_pool_spec = {
+                # TODO: node size;
+                actor_pool_id: gen_pool_spec(actor_pool_size),
+                rollout_pool_id: gen_pool_spec(rollout_pool_size),
+            }
+            if hybrid_actor_ref:
+                ref_pool_id = actor_pool_id
+            else:
+                resource_pool_spec[ref_pool_id] = gen_pool_spec(ref_pool_size)
+
+            self.mapping = {
+                Role.Actor: actor_pool_id,
+                Role.RefPolicy: ref_pool_id,
+                Role.Rollout: rollout_pool_id,
+                Role.Critic: actor_pool_id,
+            }
+
+        else:
+            global_pool_id = "global_pool"
+            resource_pool_spec = {
+                global_pool_id: [config.trainer.n_gpus_per_node] * config.trainer.nnodes,
+            }
+            self.mapping[Role.ActorRollout] = global_pool_id
+            self.mapping[Role.Critic] = global_pool_id
+
         from verl.trainer.ppo.ray_trainer import ResourcePoolManager
 
         resource_pool_manager = ResourcePoolManager(resource_pool_spec=resource_pool_spec, mapping=self.mapping)
@@ -263,9 +321,13 @@ def run(self, config):
         train_dataset = create_rl_dataset(config.data.train_files, config.data, tokenizer, processor, is_train=True)
         val_dataset = create_rl_dataset(config.data.val_files, config.data, tokenizer, processor, is_train=False)
         train_sampler = create_rl_sampler(config.data, train_dataset)
-
-        # Initialize the PPO trainer.
-        trainer = RayPPOTrainer(
+        
+        ppo_trainer_class = RayPPOAsyncPipelineTrainer if config.trainer.get("async_pipeline", False) else RayPPOTrainer
+        if hasattr(ppo_trainer_class, '__name__'):
+            print(f"Using PPO trainer class: {ppo_trainer_class.__name__}")
+        else:
+            print(f"Using PPO trainer class: {ppo_trainer_class}")
+        trainer = ppo_trainer_class(
             config=config,
             tokenizer=tokenizer,
             processor=processor,
@@ -283,6 +345,7 @@ def run(self, config):
         trainer.init_workers()
         # Start the training process.
         trainer.fit()
+        print(f"Using PPO trainer fit done")
 
 
 def create_rl_dataset(data_paths, data_config, tokenizer, processor, is_train=True):
@@ -348,7 +411,7 @@ def create_rl_sampler(data_config, dataset):
     import torch
     from torch.utils.data import RandomSampler, SequentialSampler
 
-    if data_config.sampler is not None and data_config.sampler.get("class_path", None) is not None:
+    if hasattr(data_config, 'sampler') and data_config.sampler is not None and data_config.sampler.get("class_path", None) is not None:
         curriculum_class = load_extern_type(
             data_config.sampler.class_path,
             data_config.sampler.class_name,
diff --git a/verl/trainer/ppo/pipeline/README.md b/verl/trainer/ppo/pipeline/README.md
new file mode 100644
index 00000000000..b7697ec065f
--- /dev/null
+++ b/verl/trainer/ppo/pipeline/README.md
@@ -0,0 +1,251 @@
+# Async-RL Pipeline
+
+## Overview
+
+The Async-RL Pipeline is a state-of-the-art implementation of asynchronous reinforcement learning training based on a fully decoupled architecture. It separates actor-train, actor-forward-logp, ref_logp, and rollout-generate components to achieve optimal performance and scalability.
+
+## Architecture Principles
+
+### Fully Decoupled Architecture
+The system is built on a fully decoupled RL training architecture where different components operate independently:
+- **Actor-Train**: Handles the main training loop
+- **Actor-Forward-LogP**: Computes log probabilities for the actor
+- **Ref_LogP**: Computes reference log probabilities
+- **Rollout-Generate**: Handles sequence generation and rollout
+
+This decoupling enables true asynchronous operation and eliminates bottlenecks that occur in traditional synchronous RL training.
+
+## Key Features
+
+### 1. State Machine Design
+
+The pipeline implements a sophisticated state machine design where different state transitions correspond to the entire async-RL pipeline workflow:
+
+**Pipeline Components:**
+- `dataloader` → `generate` → `rollout` → `logp` → `ref_logp` → `reward` → `train` → `param_update`
+
+**Design Rationale:**
+RL training workflows are inherently complex. While synchronous approaches can simply execute tasks sequentially, async-RL requires complex state transitions between different tasks. To ensure both performance and accuracy, the system employs flexible scheduling strategies that bind tasks to resources logically. Each task maintains its own production and consumption loop to prevent errors. In this context, designing RL state machines provides a friendly and manageable approach.
+
+**Benefits:**
+- Clear separation of concerns
+- Predictable state transitions
+- Error isolation and recovery
+- Resource management optimization
+
+### 2. Asynchronous Parameter Synchronization
+
+The system implements true asynchronous parameter updates by decomposing the parameter synchronization process:
+
+**Traditional Approach:**
+- Used NCCL-based parameter synchronization
+- Limited by NCCL's non-thread-safe nature
+- Could not achieve true asynchrony
+
+**New Implementation:**
+The parameter update process is decomposed into three main components:
+1. **Gather**: Uses NCCL for parameter aggregation (must be serial)
+2. **Send/Recv**: Asynchronous CPU communication
+3. **Load**: Parameter loading without affecting GPU compute
+
+**Benefits:**
+- Enables `generate` vs `param_update` vs `train` asynchrony
+- Preserves GPU compute resources
+- Maintains training accuracy
+- Reuses existing VERL implementation logic
+
+### 3. Arbitrary Granularity Parallelism
+
+The system addresses RL training bottlenecks through intelligent task overlap:
+
+**Problem:**
+RL bottlenecks typically occur in `rollout` or `train` tasks. When any task blocks (e.g., long-tail issues causing generate tasks to pend), all other GPUs idle, significantly reducing training efficiency.
+
+**Solution:**
+- Complete asynchrony decouples train and rollout tasks
+- Allows off-policy training with task overlap
+- Enables optimal performance through intelligent scheduling
+
+**Example Scenarios:**
+
+**Fast Generation Tasks:**
+- Rollout-generate completes quickly while train is still running
+- System can proceed to next round or even n rounds of generate tasks
+- Maintains continuous training flow
+
+**Slow Generation Tasks:**
+- Long generate tasks don't block train operations
+- Train can continue consuming previous rounds' generate results
+- Ensures sustained training progress
+
+**Performance Impact:**
+- Reduces long-tail effects
+- Maintains high Model FLOPs Utilization (MFU)
+- Achieves near-linear scaling (0.9 linearity)
+
+## Performance Results
+
+### Benchmark Configuration
+- **Model**: Red-MoE-16B
+- **Hardware**: 4 machines
+- **Configuration**: TP1 + PP1 + EP4 + SGLang-TP2
+- **Algorithm**: GRPO
+- **Batch Size**: 128
+
+### Performance Improvements
+- **Baseline**: Synchronous RL training
+- **Improvement**: 50% performance increase
+- **Scalability**: Increasing batch size can achieve up to 100% performance improvement
+
+## Pipeline Components
+
+### State Machines
+
+1. **DataloaderStateMachine**
+   - Manages data loading and preprocessing
+   - Ensures proper data flow to downstream components
+
+2. **GenerateStateMachine**
+   - Handles sequence generation
+   - Supports interruptible generation for better resource utilization
+
+3. **RolloutStateMachine**
+   - Orchestrates the rollout process
+   - Manages data flow between components
+
+4. **LogPStateMachine**
+   - Computes log probabilities for the actor
+   - Ensures proper resource locking
+
+5. **RefLogPStateMachine**
+   - Computes reference log probabilities
+   - Supports reference policy evaluation
+
+6. **RewardStateMachine**
+   - Calculates rewards and advantages
+   - Handles reward model integration
+
+7. **TrainStateMachine**
+   - Manages the main training loop
+   - Coordinates all training operations
+
+8. **ParamUpdateStateMachine**
+   - Handles asynchronous parameter updates
+   - Manages parameter synchronization
+
+### Pipeline Flow
+
+```
+dataloader → generate → rollout → logp/ref_logp → reward → train → param_update
+    ↓           ↓         ↓           ↓           ↓        ↓         ↓
+  Data      Sequence   Process    Compute    Calculate  Update   Sync
+Loading   Generation   Rollout   Log Probs   Rewards   Model    Params
+```
+
+## Configuration
+
+### Key Parameters
+
+```python
+# Async RL Configuration
++actor_rollout_ref.async_pipeline=True \
+ 
+# Resource Management
++trainer.use_nodes_ratios=[0.5,0.5,0.5,0.5] \
+# means: train/logp/ref_logp use 0.5 ngpus, generate use 0.5 ngpus
+ 
+# Performance Tuning, enable async-param-update
++actor_rollout_ref.rollout.enable_dual_buffer=True \
+# The sender granularity of the actor training node during parameter update
++actor_rollout_ref.rollout.param_update_preduce_bucket_size_mb=512 \
+# The receiver granularity of the rollout inference node is too large, which will cause GPU-OOM
++actor_rollout_ref.rollout.param_update_consume_bucket_size_mb=128 \
+ 
+# The granularity of offpolicy, 2 means that generate is faster than the train node to execute 2 steps, that is, one-step-offpolicy
++trainer.generate_ahead_steps=2 \
+```
+
+## Usage
+
+### Basic Usage
+
+```python
+from verl.trainer.ppo.pipeline import AsyncTrainingFlow
+
+# Initialize the training flow
+flow = AsyncTrainingFlow(
+    trainer=trainer,
+    enable_async_rl=True,
+)
+
+# Run the async training
+await flow.run()
+```
+
+### Advanced Configuration
+
+```python
+# Custom state machine creation
+# Inherit the state machine base class and implement your own state machine and insert it to async pipeline flow(AsyncTrainingFlow)
+from verl.trainer.ppo.pipeline import create_role_state_machine
+
+state_machine = create_role_state_machine(
+    role_name="train",
+    pipeline=pipeline,
+    trainer=trainer,
+    use_async_rl=True
+)
+```
+
+## Future Enhancements (TODO)
+
+### 1. Validation Asynchronous Support
+- **Status**: Currently disabled
+- **Plan**: Add validation state machine
+- **Integration**: Interleave with dataloader and generate flows
+- **Goal**: Create parallel data streams for training and validation
+
+### 2. Critic Asynchronous Support
+- **Status**: Limited to GRPO support
+- **Plan**: Extend to other algorithms
+- **Goal**: Full critic component asynchrony
+
+### 3. LogP Asynchronous Support
+- **Status**: Partially implemented
+- **Plan**: Complete logp to train recv+load operations
+- **Goal**: Full logp component asynchrony
+
+### 4. Off-Policy Monitoring
+- **Status**: Not implemented
+- **Plan**: Monitor off-policy lag steps
+- **Goal**: Track param_update lag behind actor train-step
+- **Metrics**: Monitor generate param_update vs actor train-step difference
+
+## Technical Details
+
+### Resource Management
+
+The system implements sophisticated resource management through:
+- **Resource Locking**: Prevents resource contention between components
+- **Task Scheduling**: Intelligent task overlap and scheduling
+- **Memory Management**: Efficient memory usage and cleanup
+
+### Error Handling
+
+- **State Machine Error Recovery**: Each state machine handles its own errors
+- **Pipeline Resilience**: System continues operation even if individual components fail
+- **Graceful Degradation**: Falls back to synchronous mode if needed
+
+## Contributing
+
+When contributing to the async-RL pipeline:
+
+1. **State Machine Design**: Follow the established state machine patterns
+2. **Resource Management**: Ensure proper resource locking and cleanup
+3. **Performance**: Consider the impact on overall pipeline performance
+4. **Testing**: Test both synchronous and asynchronous modes
+5. **Documentation**: Update this README for any new features
+
+---
+
+For more detailed information about specific components, please refer to the individual module documentation. 
\ No newline at end of file
diff --git a/verl/trainer/ppo/pipeline/__init__.py b/verl/trainer/ppo/pipeline/__init__.py
new file mode 100644
index 00000000000..bb9de0e5ad1
--- /dev/null
+++ b/verl/trainer/ppo/pipeline/__init__.py
@@ -0,0 +1,71 @@
+"""
+Pipeline module for PPO training with async pipeline support.
+"""
+
+from .pipeline_utils import (
+    AsyncPipeline,
+    enhanced_print,
+    PIPELINE_END_SIGNAL,
+    PIPELINE_START_SINGLE,
+    ROLE_COLORS,
+)
+
+from .state_machine import (
+    RoleState,
+    RoleEvent,
+    BaseRoleStateMachine,
+    AsyncTrainingFlow,
+)
+
+from .state_machine_impl import (
+    DataloaderStateMachine,
+    TrainStateMachine,
+    RolloutStateMachine,
+    RewardStateMachine,
+    LogPStateMachine,
+    RefLogPStateMachine,
+    ParamUpdateStateMachine,
+    GenerateStateMachine,
+    create_role_state_machine,
+)
+
+from .utils import (
+    ResourceLock,
+    TimingStatsCollector,
+    timing_decorator,
+    resource_lock,
+    global_timing_collector,
+)
+
+__all__ = [
+    # Pipeline utility classes
+    "AsyncPipeline",
+    "enhanced_print", 
+    "PIPELINE_END_SIGNAL",
+    "PIPELINE_START_SINGLE",
+    "ROLE_COLORS",
+    
+    # State Machine basic framework
+    "RoleState",
+    "RoleEvent", 
+    "BaseRoleStateMachine",
+    "AsyncTrainingFlow",
+    
+    # State Machine implementation
+    "DataloaderStateMachine",
+    "TrainStateMachine",
+    "RolloutStateMachine",
+    "RewardStateMachine",
+    "LogPStateMachine",
+    "RefLogPStateMachine",
+    "ParamUpdateStateMachine",
+    "GenerateStateMachine",
+    "create_role_state_machine",
+    
+    # Utility classes
+    "ResourceLock",
+    "TimingStatsCollector",
+    "timing_decorator",
+    "resource_lock",
+    "global_timing_collector",
+] 
\ No newline at end of file
diff --git a/verl/trainer/ppo/pipeline/pipeline_utils.py b/verl/trainer/ppo/pipeline/pipeline_utils.py
new file mode 100644
index 00000000000..ff9b225987e
--- /dev/null
+++ b/verl/trainer/ppo/pipeline/pipeline_utils.py
@@ -0,0 +1,572 @@
+"""
+Asynchronous Pipeline management class, responsible for data flow and queue management between roles
+
+Performance optimization summary:
+1. Original problem: Ray queue's put_async/get_async takes too long (40+ seconds)
+2. Root cause: Internal locks and network transmission bottlenecks in Ray queue in distributed environment
+3. Solution: Use DIRECT_OBJECT_STORE mode, bypass Ray queue
+4. Optimization effect: ray.put/ray.get takes milliseconds (0.000s)
+5. Current bottleneck: Model parameter update itself (22-35 seconds), not data transmission
+
+Transfer mode comparison:
+- RAY_QUEUE: Original Ray queue mode, performance bottleneck
+- RAY_QUEUE_COMPRESSED: Ray queue + compression, performance slightly improved
+- RAY_QUEUE_OPTIMIZED: Ray queue + ray.put optimization, performance significantly improved
+- DIRECT_OBJECT_STORE: Direct use of object store, optimal performance
+- HYBRID: Hybrid mode, automatically select based on data size
+"""
+
+import asyncio
+import time
+import pickle
+from collections import OrderedDict
+import ray
+from ray.util.queue import Queue
+from colorama import Fore, Style
+from enum import Enum
+
+# Transfer mode enumeration
+class TransferMode(Enum):
+    RAY_QUEUE = "ray.queue"                    # Original Ray queue method
+    RAY_QUEUE_COMPRESSED = "ray.queue.compressed"  # Ray queue + compression
+    RAY_QUEUE_OPTIMIZED = "ray.queue.optimized"    # Ray queue + ray.put optimization
+    DIRECT_OBJECT_STORE = "direct.object.store"    # Direct use of object store, bypass queue
+    HYBRID = "hybrid"                          # Hybrid mode, choose based on data size
+
+# Pipeline signal constants
+PIPELINE_END_SIGNAL = "__PIPELINE_END_SIGNAL__"
+PIPELINE_START_SINGLE = "__PIPELINE_START_SINGLE__"
+
+# Define color mapping
+ROLE_COLORS = {
+    "dataloader": Fore.WHITE,
+    "rollout": Fore.BLUE,
+    "generate": Fore.YELLOW,
+    "train": Fore.GREEN,
+    "reward": Fore.WHITE,
+    "logp": Fore.MAGENTA,
+    "ref_logp": Fore.CYAN,
+    "param_update": Fore.RED,
+}
+
+def enhanced_print(src_role, dst_role, message):
+    """Enhanced print output with color and role identification"""
+    max_len = max([len(role) for role in ROLE_COLORS.keys()])
+    src_role_formatted = f"{src_role:<{max_len}}"
+    src_color = ROLE_COLORS.get(src_role, Fore.WHITE)
+    timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+    
+    print(
+        f"[{timestamp}] "
+        f"{src_color}[{src_role_formatted}]{Style.RESET_ALL} "
+        f"{message}"
+    )
+
+
+def auto_register_queue(func):
+    """Decorator: auto-register queue"""
+    async def wrapper(self, src_role, dst_role, *args, **kwargs):
+        # Auto-register queue (if not registered)
+        if not self.is_queue_registered(src_role, dst_role):
+            self.register_queue(src_role, dst_role)
+        return await func(self, src_role, dst_role, *args, **kwargs)
+    return wrapper
+
+def auto_register_queue_sync(func):
+    """Decorator: auto-register queue (sync version)"""
+    def wrapper(self, src_role, dst_role, *args, **kwargs):
+        # Auto-register queue (if not registered)
+        if not self.is_queue_registered(src_role, dst_role):
+            self.register_queue(src_role, dst_role)
+        return func(self, src_role, dst_role, *args, **kwargs)
+    return wrapper
+
+
+class AsyncPipeline:
+    """Asynchronous Pipeline management class, responsible for data flow and queue management between roles"""
+    
+    def __init__(self, max_queue_size=1, transfer_mode=TransferMode.RAY_QUEUE_OPTIMIZED):
+        """
+        Initialize the AsyncPipeline.
+        
+        Args:
+            max_queue_size: Maximum queue size
+            transfer_mode: Transfer mode
+        """
+        self.max_queue_size = max_queue_size
+        self.transfer_mode = transfer_mode
+        
+        # Define all supported roles
+        self.role = {
+            "dataloader",
+            "rollout",
+            "train",
+            "generate",
+            "param_update",
+            "reward",
+            "logp",
+            "ref_logp",
+        }
+        
+        enhanced_print("pipeline", None, f"AsyncPipeline initialized: max_queue_size={max_queue_size}, transfer_mode={transfer_mode.value}")
+        
+        # Initialize based on transfer mode
+        if transfer_mode in [TransferMode.RAY_QUEUE, TransferMode.RAY_QUEUE_COMPRESSED, TransferMode.RAY_QUEUE_OPTIMIZED]:
+            # Use Ray queue
+            self._queue_pairs = OrderedDict()
+            self._object_store_pairs = None
+        elif transfer_mode == TransferMode.DIRECT_OBJECT_STORE:
+            # Direct use of object store
+            self._object_store_pairs = OrderedDict()
+            self._queue_pairs = None
+        elif transfer_mode == TransferMode.HYBRID:
+            # Hybrid mode, initialize both
+            self._queue_pairs = OrderedDict()
+            self._object_store_pairs = OrderedDict()
+        else:
+            raise ValueError(f"Unsupported transfer mode: {transfer_mode}")
+    
+    def register_queue(self, src_role, dst_role):
+        """Dynamically register queue or object store"""
+        use_queue_name = f"{src_role}_to_{dst_role}"
+        
+        # Dynamically add roles to role set
+        self.role.add(src_role)
+        self.role.add(dst_role)
+        
+        if self.transfer_mode in [TransferMode.RAY_QUEUE, TransferMode.RAY_QUEUE_COMPRESSED, TransferMode.RAY_QUEUE_OPTIMIZED]:
+            # Use queue mode, register Ray queue
+            if use_queue_name not in self._queue_pairs:
+                self._queue_pairs[use_queue_name] = Queue(maxsize=self.max_queue_size)
+                enhanced_print("pipeline", None, f"Registered queue: {use_queue_name} with maxsize={self.max_queue_size}")
+        
+        if self.transfer_mode in [TransferMode.DIRECT_OBJECT_STORE, TransferMode.HYBRID]:
+            # Bypass queue mode, register object store list
+            if use_queue_name not in self._object_store_pairs:
+                self._object_store_pairs[use_queue_name] = []
+                enhanced_print("pipeline", None, f"Registered object store pair: {use_queue_name}")
+    
+    def is_queue_registered(self, src_role, dst_role):
+        """Check if queue is registered"""
+        use_queue_name = f"{src_role}_to_{dst_role}"
+        
+        if self.transfer_mode in [TransferMode.RAY_QUEUE, TransferMode.RAY_QUEUE_COMPRESSED, TransferMode.RAY_QUEUE_OPTIMIZED]:
+            return use_queue_name in self._queue_pairs
+        elif self.transfer_mode == TransferMode.DIRECT_OBJECT_STORE:
+            return use_queue_name in self._object_store_pairs
+        elif self.transfer_mode == TransferMode.HYBRID:
+            return use_queue_name in self._queue_pairs or use_queue_name in self._object_store_pairs
+        return False
+    
+    def is_in_pipeline(self, role):
+        """Check if role is in pipeline"""
+        return role in self.role
+    
+    def is_complete(self, src_role, dst_role):
+        """
+        Check if pipeline is complete
+        1. Current src_to_dst queue is empty
+        2. Last queue (dataloader_to_train) is not empty
+        """
+        use_queue_name = f"{src_role}_to_{dst_role}"
+        cur_pipeline_queue = self._queue_pairs.get(use_queue_name, None)
+        if cur_pipeline_queue is None:
+            raise ValueError(f"Queue {use_queue_name} not found in pipeline queues: {self._queue_pairs.keys()}")
+        
+        first_queue_name = "rollout_to_dataloader"
+        last_queue_name = "dataloader_to_rollout"
+        first_pipeline_queue = self._queue_pairs.get(first_queue_name)
+        last_pipeline_queue = self._queue_pairs.get(last_queue_name)
+        
+        def _get_keys_before_cur_queue():
+            keys_before_cur_queue = []
+            for key in self._queue_pairs.keys():
+                if key == first_queue_name: continue
+                if key == last_queue_name: continue
+                if key == use_queue_name: break
+                keys_before_cur_queue.append(key)
+            return keys_before_cur_queue
+        
+        # Check if all preceding queues are empty
+        for key in _get_keys_before_cur_queue():
+            queue = self._queue_pairs.get(key)
+            if queue is not None and not queue.empty():
+                return False
+        
+        # Current is empty
+        if cur_pipeline_queue.empty() and not last_pipeline_queue.empty():
+            print(f"[{src_role}] to [{dst_role}] queue is empty: {use_queue_name}")
+            return True
+        
+        return False
+    
+    def get_queue_size(self, src_role, dst_role):
+        """Get queue or object store size"""
+        use_queue_name = f"{src_role}_to_{dst_role}"
+        
+        if self.transfer_mode in [TransferMode.RAY_QUEUE, TransferMode.RAY_QUEUE_COMPRESSED, TransferMode.RAY_QUEUE_OPTIMIZED]:
+            # Use queue mode
+            if use_queue_name in self._queue_pairs:
+                return self._queue_pairs[use_queue_name].qsize()
+            else:
+                return 0
+        elif self.transfer_mode == TransferMode.DIRECT_OBJECT_STORE:
+            # Bypass queue mode
+            if use_queue_name in self._object_store_pairs:
+                return len(self._object_store_pairs[use_queue_name])
+            else:
+                return 0
+        elif self.transfer_mode == TransferMode.HYBRID:
+            # Hybrid mode, prioritize queue size
+            if use_queue_name in self._queue_pairs:
+                return self._queue_pairs[use_queue_name].qsize()
+            elif use_queue_name in self._object_store_pairs:
+                return len(self._object_store_pairs[use_queue_name])
+            else:
+                return 0
+        else:
+            return 0
+    
+    @auto_register_queue
+    async def push(self, src_role, dst_role, data, debug_log=False):
+        """Push data to queue - support bypassing queue to directly use object store"""
+        use_queue_name = f"{src_role}_to_{dst_role}"
+        if debug_log:
+            enhanced_print(src_role, dst_role, f"[{src_role}] Pushing data to [{dst_role}] queue: {use_queue_name}")
+
+        # Get current node information
+        current_node_id = str(ray.get_runtime_context().node_id)
+        
+        # Record start time
+        start_time = time.time()
+
+        if self.transfer_mode == TransferMode.DIRECT_OBJECT_STORE:
+            # Bypass queue, directly use object store
+            if use_queue_name not in self._object_store_pairs:
+                raise ValueError(f"Object store pair {use_queue_name} not found")
+            
+            # Directly use ray.put, bypass queue
+            ray_put_start = time.time()
+            object_ref = ray.put(data)
+            ray_put_time = time.time() - ray_put_start
+            
+            # Add object ref to list
+            self._object_store_pairs[use_queue_name].append(object_ref)
+            
+            total_time = time.time() - start_time
+            
+            # Record performance analysis
+            if debug_log and total_time > 0.1:
+                enhanced_print(src_role, dst_role, f"DIRECT OBJECT STORE: {use_queue_name} ray_put={ray_put_time:.3f}s, total={total_time:.3f}s, refs_count={len(self._object_store_pairs[use_queue_name])}")
+            
+            if debug_log and total_time > 1.0:
+                enhanced_print(src_role, dst_role, f"⚠️ SLOW DIRECT PUT: {use_queue_name} took {total_time:.2f}s (ray_put: {ray_put_time:.2f}s) on node={current_node_id[:8]}")
+        else:
+            # Use Ray queue
+            cur_pipeline_queue = self._queue_pairs.get(use_queue_name, None)
+            assert cur_pipeline_queue is not None, f"Queue {use_queue_name} not found in pipeline queues: {self._queue_pairs.keys()}"
+            
+            # Record queue status
+            queue_size_before = cur_pipeline_queue.qsize()
+            
+            # Choose transfer method
+            if self.transfer_mode == TransferMode.RAY_QUEUE_OPTIMIZED:
+                # Use ray.put optimization: only transfer object reference
+                ray_put_start = time.time()
+                object_ref = ray.put(data)
+                ray_put_time = time.time() - ray_put_start
+                
+                # Transfer object reference
+                queue_put_start = time.time()
+                await cur_pipeline_queue.put_async(object_ref)
+                queue_put_time = time.time() - queue_put_start
+                
+                total_time = time.time() - start_time
+                queue_size_after = cur_pipeline_queue.qsize()
+                
+                # Record performance analysis
+                if debug_log and total_time > 0.1:
+                    enhanced_print(src_role, dst_role, f"RAY_PUT OPTIMIZATION: {use_queue_name} ray_put={ray_put_time:.3f}s, queue_put={queue_put_time:.3f}s, total={total_time:.3f}s, queue={queue_size_before}->{queue_size_after}, node={current_node_id[:8]}")
+                
+                if debug_log and total_time > 1.0:
+                    enhanced_print(src_role, dst_role, f"⚠️ SLOW RAY_PUT: {use_queue_name} took {total_time:.2f}s (ray_put: {ray_put_time:.2f}s, queue_put: {queue_put_time:.2f}s) on node={current_node_id[:8]}")
+            elif self.transfer_mode == TransferMode.RAY_QUEUE_COMPRESSED:
+                # Original method: compression + queue transfer
+                compress_start = time.time()
+                
+                # Always compress for compressed mode
+                optimized_data = self._compress_data(data)
+
+                # Record compression completion time
+                compress_time = time.time() - compress_start
+                put_start = time.time()
+                
+                # Key analysis: put_async performance
+                await cur_pipeline_queue.put_async(optimized_data)
+                
+                # Record put_async completion time
+                put_time = time.time() - put_start
+                
+                total_time = time.time() - compress_start
+                queue_size_after = cur_pipeline_queue.qsize()
+                
+                # Detailed analysis of put_async performance, including node information
+                if debug_log and put_time > 0.1:  # Record if over 100ms
+                    enhanced_print(src_role, dst_role, f"PUT_ASYNC ANALYSIS: {use_queue_name} put_async={put_time:.3f}s, compress={compress_time:.3f}s, total={total_time:.3f}s, queue={queue_size_before}->{queue_size_after}")
+                
+                if debug_log and total_time > 1.0:
+                    enhanced_print(src_role, dst_role, f"⚠️ SLOW PUSH: {use_queue_name} took {total_time:.2f}s (put_async: {put_time:.2f}s, compress: {compress_time:.2f}s) on node={current_node_id[:8]}")
+            else: # RAY_QUEUE mode
+                # Original method: compression + queue transfer
+                compress_start = time.time()
+                # Always compress for ray queue mode
+                optimized_data = self._compress_data(data)
+                
+                compress_time = time.time() - compress_start
+                put_start = time.time()
+                
+                await cur_pipeline_queue.put_async(optimized_data)
+                
+                put_time = time.time() - put_start
+                
+                total_time = time.time() - compress_start
+                queue_size_after = cur_pipeline_queue.qsize()
+                
+                if debug_log and put_time > 0.1:
+                    enhanced_print(src_role, dst_role, f"PUT_ASYNC ANALYSIS: {use_queue_name} put_async={put_time:.3f}s, compress={compress_time:.3f}s, total={total_time:.3f}s, queue={queue_size_before}->{queue_size_after}")
+                
+                if debug_log and total_time > 1.0:
+                    enhanced_print(src_role, dst_role, f"⚠️ SLOW PUSH: {use_queue_name} took {total_time:.2f}s (put_async: {put_time:.2f}s, compress: {compress_time:.2f}s) on node={current_node_id[:8]}")
+        
+        return True
+    
+    def _optimize_data_for_transfer(self, data):
+        """Optimize data transmission - compression and serialization optimization"""
+        # Always compress for optimized mode
+        return self._compress_data(data)
+
+    
+    def _compress_data(self, data):
+        """Compress data - dynamically import compression library"""
+        try:
+            try:
+                import lz4.frame
+                compression_lib = lz4.frame
+            except ImportError:
+                # If lz4 is not available, fallback to zlib
+                import zlib
+                compression_lib = zlib
+            
+            # Serialize data
+            serialized = pickle.dumps(data)
+            
+            # Use compression library
+            compressed = compression_lib.compress(serialized)
+            
+            # Check compression effect
+            compression_ratio = len(compressed) / len(serialized)
+            if compression_ratio < 0.9:  # Use if compression ratio exceeds 10%
+                enhanced_print("pipeline", None, f"Data compressed: {len(serialized)} -> {len(compressed)} bytes (ratio: {compression_ratio:.2f}) using {compression_lib.__name__}")
+                return {"compressed": True, "data": compressed, "compression_lib": compression_lib.__name__}
+            else:
+                # Poor compression effect, return original data
+                return {"compressed": False, "data": data}
+        except Exception as e:
+            enhanced_print("pipeline", None, f"Compression failed: {e}")
+            return {"compressed": False, "data": data}
+
+    def _estimate_data_size_pickle(self, data):
+        """Use pickle to estimate data size (MB) - only call when needed"""
+        try:
+            # Try to serialize data to estimate size
+            serialized = pickle.dumps(data)
+            size_bytes = len(serialized)
+            size_mb = size_bytes / (1024 * 1024)
+            return size_mb
+        except Exception as e:
+            # If serialization fails, return 0
+            enhanced_print("pipeline", None, f"Failed to estimate data size with pickle: {e}")
+            return 0.0
+
+
+    @auto_register_queue
+    async def pull(self, src_role, dst_role, debug_log=False):
+        """Pull data from queue - support bypassing queue to directly use object store"""
+        use_queue_name = f"{src_role}_to_{dst_role}"
+        if debug_log:
+            enhanced_print(dst_role, src_role, f"[{dst_role}] Pulling data from [{src_role}] queue: {use_queue_name}")
+
+        start_time = time.time()
+        
+        if self.transfer_mode == TransferMode.DIRECT_OBJECT_STORE:
+            # Bypass queue, directly get from object store
+            if use_queue_name not in self._object_store_pairs:
+                raise ValueError(f"Object store pair {use_queue_name} not found")
+            
+            # Wait for available object ref
+            while not self._object_store_pairs[use_queue_name]:
+                await asyncio.sleep(0.1)  # 100ms wait
+            
+            # Get first object ref
+            object_ref = self._object_store_pairs[use_queue_name].pop(0)
+            
+            # Get data from object store
+            ray_get_start = time.time()
+            result = ray.get(object_ref)
+            ray_get_time = time.time() - ray_get_start
+            
+            total_time = time.time() - start_time
+            
+            # Record performance analysis
+            if total_time > 0.1:
+                enhanced_print(dst_role, src_role, f"DIRECT OBJECT STORE GET: {use_queue_name} ray_get={ray_get_time:.3f}s, total={total_time:.3f}s, remaining_refs={len(self._object_store_pairs[use_queue_name])}")
+            
+            if total_time > 1.0:
+                enhanced_print(dst_role, src_role, f"⚠️ SLOW DIRECT GET: {use_queue_name} took {total_time:.2f}s (ray_get: {ray_get_time:.2f}s)")
+        else:
+            # Use Ray queue
+            cur_pipeline_queue = self._queue_pairs.get(use_queue_name, None)
+            assert cur_pipeline_queue is not None, f"Queue {use_queue_name} not found in pipeline queues: {self._queue_pairs.keys()}"
+            
+            # Get data from queue
+            data = await cur_pipeline_queue.get_async()
+            
+            # Record queue get time
+            queue_get_time = time.time() - start_time
+            
+            # Choose processing method
+            if self.transfer_mode == TransferMode.RAY_QUEUE_OPTIMIZED:
+                # Use ray.put optimization: get data from object reference
+                ray_get_start = time.time()
+                
+                # Check if it's object reference
+                if hasattr(data, '_ray_object_ref'):  # Ray object reference
+                    result = ray.get(data)
+                else:
+                    result = data
+                
+                ray_get_time = time.time() - ray_get_start
+                total_time = time.time() - start_time
+                
+                # Record performance analysis
+                if total_time > 0.1:
+                    enhanced_print(dst_role, src_role, f"RAY_GET OPTIMIZATION: {use_queue_name} queue_get={queue_get_time:.3f}s, ray_get={ray_get_time:.3f}s, total={total_time:.3f}s")
+                
+                if total_time > 1.0:
+                    enhanced_print(dst_role, src_role, f"⚠️ SLOW RAY_GET: {use_queue_name} took {total_time:.2f}s (queue_get: {queue_get_time:.2f}s, ray_get: {ray_get_time:.2f}s)")
+            elif self.transfer_mode == TransferMode.RAY_QUEUE_COMPRESSED:
+                # Original method: decompression processing
+                get_time = queue_get_time
+                decompress_start = time.time()
+                
+                # Process compressed data
+                if isinstance(data, dict) and "compressed" in data:
+                    if data["compressed"]:
+                        # Decompress data
+                        compression_lib = data.get("compression_lib", "lz4.frame")
+                        result = self._decompress_data(data["data"], compression_lib)
+                    else:
+                        # Uncompressed data
+                        result = data["data"]
+                else:
+                    # Original data
+                    result = data
+
+                decompress_time = time.time() - decompress_start
+                total_time = time.time() - start_time
+                
+                # Detailed analysis of get_async performance
+                if get_time > 0.1:  # Record if over 100ms
+                    enhanced_print(dst_role, src_role, f"GET_ASYNC ANALYSIS: {use_queue_name} get_async={get_time:.3f}s, decompress={decompress_time:.3f}s, total={total_time:.3f}s")
+                
+                # Print warning if total time exceeds 1 second
+                if total_time > 1.0:
+                    enhanced_print(dst_role, src_role, f"⚠️ SLOW PULL: {use_queue_name} took {total_time:.2f}s (get_async: {get_time:.2f}s, decompress: {decompress_time:.2f}s)")
+            else: # RAY_QUEUE mode
+                # Original method: decompression processing
+                get_time = queue_get_time
+                decompress_start = time.time()
+                
+                # Process compressed data
+                if isinstance(data, dict) and "compressed" in data:
+                    if data["compressed"]:
+                        # Decompress data
+                        compression_lib = data.get("compression_lib", "lz4.frame")
+                        result = self._decompress_data(data["data"], compression_lib)
+                    else:
+                        # Uncompressed data
+                        result = data["data"]
+                else:
+                    # Original data
+                    result = data
+                
+                decompress_time = time.time() - decompress_start
+                total_time = time.time() - start_time
+                
+                # Detailed analysis of get_async performance
+                if get_time > 0.1:  # Record if over 100ms
+                    enhanced_print(dst_role, src_role, f"GET_ASYNC ANALYSIS: {use_queue_name} get_async={get_time:.3f}s, decompress={decompress_time:.3f}s, total={total_time:.3f}s")
+                
+                # Print warning if total time exceeds 1 second
+                if total_time > 1.0:
+                    enhanced_print(dst_role, src_role, f"⚠️ SLOW PULL: {use_queue_name} took {total_time:.2f}s (get_async: {get_time:.2f}s, decompress: {decompress_time:.2f}s)")
+        
+        return result
+    
+    def _decompress_data(self, compressed_data, compression_lib_name="lz4.frame"):
+        """Decompress data - dynamically import compression library"""
+        try:
+            # Dynamically import compression library
+            if compression_lib_name == "lz4.frame":
+                try:
+                    import lz4.frame
+                    compression_lib = lz4.frame
+                except ImportError:
+                    # If lz4 is not available, fallback to zlib
+                    import zlib
+                    compression_lib = zlib
+            else:
+                import zlib
+                compression_lib = zlib
+            
+            # Use compression library to decompress
+            if hasattr(compression_lib, 'decompress'):
+                # lz4.frame.decompress
+                decompressed = compression_lib.decompress(compressed_data)
+            else:
+                # zlib.decompress
+                decompressed = compression_lib.decompress(compressed_data)
+            
+            # Deserialize data
+            data = pickle.loads(decompressed)
+            
+            enhanced_print("pipeline", None, f"Data decompressed: {len(compressed_data)} -> {len(decompressed)} bytes using {compression_lib.__name__}")
+            return data
+        except Exception as e:
+            enhanced_print("pipeline", None, f"Decompression failed: {e}")
+            return None 
+
+    @auto_register_queue_sync
+    def get_cur_queue(self, src_role, dst_role):
+        """Get current queue or object store list"""
+        use_queue_name = f"{src_role}_to_{dst_role}"
+        
+        if self.transfer_mode in [TransferMode.RAY_QUEUE, TransferMode.RAY_QUEUE_COMPRESSED, TransferMode.RAY_QUEUE_OPTIMIZED]:
+            # Use queue mode, return Ray queue
+            if use_queue_name not in self._queue_pairs:
+                raise ValueError(f"Queue {use_queue_name} not found in pipeline queues: {self._queue_pairs.keys()}")
+            return self._queue_pairs[use_queue_name]
+        elif self.transfer_mode == TransferMode.DIRECT_OBJECT_STORE:
+            # Bypass queue mode, return object store list
+            if use_queue_name not in self._object_store_pairs:
+                raise ValueError(f"Object store pair {use_queue_name} not found in object store pairs: {self._object_store_pairs.keys()}")
+            return self._object_store_pairs[use_queue_name]
+        elif self.transfer_mode == TransferMode.HYBRID:
+            if use_queue_name in self._queue_pairs:
+                return self._queue_pairs[use_queue_name]
+            elif use_queue_name in self._object_store_pairs:
+                return self._object_store_pairs[use_queue_name]
+            else:
+                raise ValueError(f"Neither queue nor object store pair {use_queue_name} found")
+        else:
+            raise ValueError(f"Unsupported transfer mode: {self.transfer_mode}") 
\ No newline at end of file
diff --git a/verl/trainer/ppo/pipeline/state_machine.py b/verl/trainer/ppo/pipeline/state_machine.py
new file mode 100644
index 00000000000..1aad4b98cca
--- /dev/null
+++ b/verl/trainer/ppo/pipeline/state_machine.py
@@ -0,0 +1,362 @@
+"""
+State machine implementation for async pipeline roles.
+
+This module provides the base state machine framework and common utilities.
+The actual state machine implementations are in enhanced_state_machine.py.
+"""
+
+import asyncio
+import time
+import ray
+from enum import Enum, auto
+from typing import Optional, Any, Dict, List
+from abc import ABC, abstractmethod
+
+from .pipeline_utils import enhanced_print
+
+"""
+State Machine Design:
+
+IDLE
+ └── START ──> WAITING_INPUT
+                  ├── RECEIVE_DATA ──> PROCESSING
+                  │                      ├── PROCESS_COMPLETE ──> WAITING_OUTPUT
+                  │                      └── ERROR ──> ERROR
+                  ├── STOP ──> DONE
+                  └── ERROR ──> ERROR
+WAITING_OUTPUT
+ ├── SEND_DATA ──> WAITING_INPUT
+ └── ERROR ──> ERROR
+ERROR
+ └── START ──> WAITING_INPUT
+DONE
+ └── START ──> WAITING_INPUT
+"""
+
+
+class RoleState(Enum):
+    """Role state enumeration"""
+    IDLE = auto()           # Idle state
+    WAITING_INPUT = auto()  # Waiting for input
+    PROCESSING = auto()     # Processing
+    WAITING_OUTPUT = auto() # Waiting for output
+    DONE = auto()          # Done
+    ERROR = auto()         # Error state
+
+
+class RoleEvent(Enum):
+    """Role event enumeration"""
+    START = auto()         # Start
+    RECEIVE_DATA = auto()  # Received data
+    PROCESS_COMPLETE = auto()  # Process complete
+    SEND_DATA = auto()     # Send data
+    ERROR = auto()         # Error
+    STOP = auto()          # Stop
+
+
+class BaseRoleStateMachine(ABC):
+    """Base role state machine class"""
+    
+    def __init__(self, role_name: str, pipeline):
+        """
+        Initialize state machine
+        
+        Args:
+            role_name: Role name
+            pipeline: AsyncPipeline instance
+        """
+        self.role_name = role_name
+        self.pipeline = pipeline
+        self.state = RoleState.IDLE
+        self.current_data = None
+        self.error_message = None
+        self.metrics = {}
+        
+        # State transition mapping
+        self.transitions = {
+            RoleState.IDLE: {
+                RoleEvent.START: RoleState.WAITING_INPUT,
+            },
+            RoleState.WAITING_INPUT: {
+                RoleEvent.RECEIVE_DATA: RoleState.PROCESSING,
+                RoleEvent.STOP: RoleState.DONE,
+            },
+            RoleState.PROCESSING: {
+                RoleEvent.PROCESS_COMPLETE: RoleState.WAITING_OUTPUT,
+                RoleEvent.ERROR: RoleState.ERROR,
+            },
+            RoleState.WAITING_OUTPUT: {
+                RoleEvent.SEND_DATA: RoleState.WAITING_INPUT,
+                RoleEvent.ERROR: RoleState.ERROR,
+            },
+            RoleState.ERROR: {
+                RoleEvent.START: RoleState.WAITING_INPUT,
+            },
+            RoleState.DONE: {
+                RoleEvent.START: RoleState.WAITING_INPUT,
+            },
+        }
+    
+    def can_transition(self, event: RoleEvent) -> bool:
+        """Check if state transition can be executed"""
+        return event in self.transitions.get(self.state, {})
+    
+    def transition(self, event: RoleEvent) -> bool:
+        """Execute state transition"""
+        if self.can_transition(event):
+            old_state = self.state
+            self.state = self.transitions[self.state][event]
+            self._on_state_change(old_state, self.state, event)
+            return True
+        return False
+    
+    def _on_state_change(self, old_state: RoleState, new_state: RoleState, event: RoleEvent):
+        """State change callback"""
+        enhanced_print(self.role_name, self.role_name, f"Event: {event.name} (State: {old_state.name} -> {new_state.name})")
+    
+    @abstractmethod
+    async def process_data(self, data: Any) -> Any:
+        """Abstract method to process data, subclass must implement"""
+        pass
+    
+    @abstractmethod
+    async def get_input_data(self) -> Optional[Any]:
+        """Abstract method to get input data, subclass must implement"""
+        pass
+    
+    @abstractmethod
+    async def send_output_data(self, data: Any) -> bool:
+        """Abstract method to send output data, subclass must implement"""
+        pass
+    
+    async def step(self) -> bool:
+        """Execute a state machine step"""
+        if self.state == RoleState.IDLE:
+            if self.transition(RoleEvent.START):
+                return True
+                
+        elif self.state == RoleState.WAITING_INPUT:
+            data = await self.get_input_data()
+            if data is not None:
+                self.current_data = data
+                if self.transition(RoleEvent.RECEIVE_DATA):
+                    return True
+                    
+        elif self.state == RoleState.PROCESSING:
+            result = await self.process_data(self.current_data)
+            if result is not None:
+                self.current_data = result
+                if self.transition(RoleEvent.PROCESS_COMPLETE):
+                    return True
+                    
+        elif self.state == RoleState.WAITING_OUTPUT:
+            success = await self.send_output_data(self.current_data)
+            if success:
+                self.current_data = None
+                if self.transition(RoleEvent.SEND_DATA):
+                    return True
+                    
+        elif self.state == RoleState.ERROR:
+            # Error state - terminate directly, no retry
+            enhanced_print(self.role_name, None, f"Error state reached, terminating {self.role_name} state machine")
+            self.state = RoleState.DONE
+            return False
+            
+        elif self.state == RoleState.DONE:
+            return False  # End loop
+            
+        return True
+    
+    async def run(self):
+        """Run state machine main loop"""
+        enhanced_print(self.role_name, None, f"Starting {self.role_name} state machine")
+        
+        while self.state != RoleState.DONE:
+            await self.step()
+        
+        enhanced_print(self.role_name, None, f"Stopped {self.role_name} state machine")
+    
+    async def stop(self):
+        """Stop state machine"""
+        enhanced_print(self.role_name, None, f"Stopping {self.role_name} state machine")
+        self.state = RoleState.DONE
+
+
+# ============================================================================
+# Training flow management class
+# ============================================================================
+
+from .pipeline_utils import AsyncPipeline, PIPELINE_END_SIGNAL, PIPELINE_START_SINGLE, TransferMode
+from .utils import global_timing_collector
+
+
+class AsyncTrainingFlow:
+    """Training flow example, support blocking mode, async param sync, fully async mode and async RL optimization"""
+    
+    def __init__(self, trainer=None, use_async_rl=True):
+        self.trainer = trainer
+        self.use_async_rl = use_async_rl
+        
+        # Select pipeline type based on switch
+        if use_async_rl:
+            # Async RL mode uses direct object store communication for maximum performance
+            self.pipeline = AsyncPipeline(
+                max_queue_size=5,  # Significantly increase queue size
+                transfer_mode=TransferMode.DIRECT_OBJECT_STORE  # Directly use object store
+            )
+            enhanced_print("AsyncTrainingFlow", None, 
+                         "Using async RL mode pipeline with direct object store communication for maximum performance")
+        else:
+            self.pipeline = AsyncPipeline(
+                max_queue_size=5,  # Increase queue size
+                transfer_mode=TransferMode.RAY_QUEUE_COMPRESSED  # Compression mode
+            )
+            enhanced_print("AsyncTrainingFlow", None, 
+                         "Using async mode pipeline with compression")
+        
+        # Create state machines
+        self.state_machines = self._create_state_machines()
+        
+        # Performance statistics collector
+        self.timing_collector = global_timing_collector
+        
+        # Print resource lock configuration
+        enhanced_print("AsyncTrainingFlow", None, 
+                     "Using resource lock mechanism for train/logp/ref_logp (shared cluster resources)")
+    
+    def _create_state_machines(self):
+        """Create state machine instance"""
+        if self.use_async_rl:
+            enhanced_print("AsyncTrainingFlow", None, 
+                         "Using async RL state machines for dual buffer and async param sync")
+        else:
+            enhanced_print("AsyncTrainingFlow", None, 
+                         "Using async state machines (legacy mode - not recommended)")
+        
+        # Import state machine implementation
+        from .state_machine_impl import create_role_state_machine
+        
+        return {
+            role_name: create_role_state_machine(
+                role_name, self.pipeline, self.trainer, 
+                use_async_rl=self.use_async_rl
+            )
+            for role_name in ["dataloader", "rollout", "train", "generate", "reward", "logp", "ref_logp", "param_update"]
+        }
+    
+    async def run_state_machine_pipeline(self):
+        """Run state machine pipeline, support all modes"""
+        print("\n" + "="*60)
+        print("STATE MACHINE PIPELINE TRAINING FLOW")
+        print("="*60)
+        
+        pipeline_type = "Efficient"
+        
+        if self.use_async_rl:
+            mode_type = "Async RL"
+            features = "🚀 Async RL, dual buffer, async param sync, 1.5-2x performance"
+        else:
+            mode_type = "Async (Legacy)"
+            features = "⚠️  Async mode (legacy), may have timing issues"
+        
+        print(f"Pipeline Type: {pipeline_type}")
+        print(f"Mode: {mode_type}")
+        print(f"State Machines: {len(self.state_machines)}")
+        print(f"Features: {features}")
+        
+        # Print all state machine information
+        for role_name, sm in self.state_machines.items():
+            print(f"  - {role_name}: {type(sm).__name__}")
+        
+        await self._init_before_pipeline()
+        
+        try:
+            # Run all state machines
+            state_machine_tasks = [sm.run() for sm in self.state_machines.values()]
+            await asyncio.gather(*state_machine_tasks)
+            
+            print("State machine pipeline training completed!")
+            
+            # Output timing statistics summary
+            global_timing_collector.print_summary()
+            
+            # Output param_update specific statistics
+            param_update_sm = self.state_machines.get("param_update")
+            if hasattr(param_update_sm, 'get_status_info'):
+                status = param_update_sm.get_status_info()
+                print(f"\nParam Update Stats: {status}")
+                
+        except Exception as e:
+            print(f"Error in state machine pipeline: {e}")
+            import traceback
+            traceback.print_exc()
+            
+            # Try graceful shutdown
+            print("Attempting graceful shutdown...")
+            try:
+                # Send end signal to all state machines
+                for role_name, sm in self.state_machines.items():
+                    try:
+                        if hasattr(sm, 'stop'):
+                            await sm.stop()
+                    except Exception as stop_error:
+                        print(f"Error stopping {role_name} state machine: {stop_error}")
+                
+                # Output timing statistics summary
+                global_timing_collector.print_summary()
+                
+            except Exception as shutdown_error:
+                print(f"Error during graceful shutdown: {shutdown_error}")
+            
+            # Re-raise exception
+            raise e
+    
+    async def _init_before_pipeline(self):
+        # trainer global step start from 1
+        self.trainer.dataloader_global_step = 0
+        self.trainer.generate_global_step = 0
+
+        # Start signal, fully align with ray_async_pipeline_trainer.py
+        await self.pipeline.push("rollout", "dataloader", PIPELINE_START_SINGLE)
+        
+        # First param_update, ensure generate waits
+        enhanced_print("AsyncTrainingFlow", None, "Starting first param_update to ensure generate waits...")
+        await self.pipeline.push(src_role="train", dst_role="param_update", data=self.trainer.global_steps)
+
+        # If logp/ref_logp and train share resources, block push to logp/ref_logp, ensure no resource contention
+        if self.trainer.config.trainer.get("share_resource_between_train_logp_ref_logp", True):
+            await self.pipeline.push(src_role="train", dst_role="logp", data=self.trainer.global_steps)
+            await self.pipeline.push(src_role="train", dst_role="ref_logp", data=self.trainer.global_steps)
+            enhanced_print("train", None, f"Sent training completion signal for step {self.trainer.global_steps} to logp/ref_logp")
+        
+
+    def get_pipeline_status(self) -> Dict[str, Any]:
+        """Get pipeline status"""
+        pipeline_type = "efficient"
+        
+        if self.use_async_rl:
+            mode = "async_rl"
+        else:
+            mode = "blocking"
+        
+        status = {
+            "pipeline_type": pipeline_type,
+            "mode": mode,
+            "use_async_rl": self.use_async_rl,
+            "state_machines": len(self.state_machines),
+            "trainer_steps": self.trainer.global_steps,
+            "trainer_dataloader_steps": self.trainer.dataloader_global_step,
+            "trainer_generate_steps": self.trainer.generate_global_step,
+            "state_machine_states": {},
+            "timing_stats": global_timing_collector.get_summary()
+        }
+        
+        for role_name, sm in self.state_machines.items():
+            status["state_machine_states"][role_name] = sm.state.value
+            
+        # Add param_update specific status
+        param_update_sm = self.state_machines.get("param_update")
+        if hasattr(param_update_sm, 'get_status_info'):
+            status["param_update_details"] = param_update_sm.get_status_info()
+        
+        return status
diff --git a/verl/trainer/ppo/pipeline/state_machine_impl.py b/verl/trainer/ppo/pipeline/state_machine_impl.py
new file mode 100644
index 00000000000..216574c7053
--- /dev/null
+++ b/verl/trainer/ppo/pipeline/state_machine_impl.py
@@ -0,0 +1,1134 @@
+"""
+State machine implementation with state-machine for n-step off-policy Async-RL training.
+
+This implementation ensures strict dependency relationships between modules by using blocking
+operations queue. This prevents errors and ensures correct one-step or n-step
+off-policy training flow.
+
+Key Features:
+- Strict dependencies: Ensures proper order of operations
+- State machine design: Clear separation of concerns, each task has its own loop state machine to handle its logic
+- N-step off-policy: Maintains correct training flow for RL algorithms
+"""
+
+import asyncio
+import time
+import torch
+import uuid
+from enum import Enum, auto
+from typing import Optional, Any, Dict, List
+from abc import ABC, abstractmethod
+from tqdm import tqdm
+from pprint import pprint
+
+from .pipeline_utils import AsyncPipeline, enhanced_print, PIPELINE_END_SIGNAL, PIPELINE_START_SINGLE, TransferMode
+from .state_machine import BaseRoleStateMachine, RoleState, RoleEvent
+from .utils import resource_lock, global_timing_collector, timing_decorator
+
+import ray
+from ray.util.queue import Queue
+import numpy as np
+from copy import deepcopy
+
+from verl.utils.metric import (
+    reduce_metrics,
+)
+from verl.trainer.ppo.reward import compute_reward, compute_reward_async
+from verl.trainer.ppo.ray_trainer import (
+    RayPPOTrainer,
+    RayClassWithInitArgs,
+    Role,
+    OmegaConf,
+    create_colocated_worker_cls,
+    compute_data_metrics,
+    compute_throughout_metrics,
+    compute_timing_metrics,
+    process_validation_metrics,
+    marked_timer,
+    compute_advantage,
+    compute_response_mask,
+    agg_loss,
+    AdvantageEstimator,
+    DataProto,
+    apply_kl_penalty,
+)
+
+
+class DataloaderStateMachine(BaseRoleStateMachine):
+    """Enhanced dataloader state machine, fully aligned with ray_async_pipeline_trainer.py switching conditions"""
+    # input:
+    #   rollout -> dataloader (just start signal once)
+    # output:
+    #   dataloader -> rollout
+    #   dataloader -> generate
+    #   dataloader -> train
+    #   dataloader -> param_update
+    #   dataloader -> logp
+    def __init__(self, pipeline, trainer):
+        super().__init__("dataloader", pipeline)
+        self.trainer = trainer
+        self.batch_iter = None
+        self.pipeline_start = None  # Store startup signal
+        
+        # Configure dataloader prefetch - increase default value to accommodate larger queues
+        self.prefetch_steps = trainer.config.trainer.get("dataloader_prefetch_steps", 10)  # Increased from 4 to 10
+        self.max_pending_size = self.prefetch_steps
+        enhanced_print("dataloader", None, f"Configured dataloader prefetch: {self.prefetch_steps} steps")
+    
+    async def get_input_data(self) -> Optional[Any]:
+        """Block getting batch data, ensure dependencies"""
+        # Check if startup signal exists
+        if not self.pipeline_start:
+            enhanced_print("dataloader", None, "Waiting for start signal from rollout")
+            # Block waiting for startup signal
+            signal = await self.pipeline.pull("rollout", "dataloader")
+            enhanced_print("dataloader", None, f"Received signal from rollout: {signal}")
+            if signal == PIPELINE_START_SINGLE:
+                self.pipeline_start = signal
+                enhanced_print("dataloader", None, "Pipeline started, Initializing batch iterator")
+                self.batch_iter = self.trainer.get_next_batch()
+        
+        # Check queue size, block waiting for queue space
+        queue_size = self.pipeline.get_queue_size("dataloader", "generate")
+        if queue_size >= self.max_pending_size:
+            # Queue full, block waiting
+            enhanced_print("dataloader", None, f"Queue full, waiting... size: {queue_size}")
+            # Use blocking wait instead of sleep
+            while self.pipeline.get_queue_size("dataloader", "generate") >= self.max_pending_size:
+                await asyncio.sleep(0.1)  # Brief check interval
+            enhanced_print("dataloader", None, f"Queue has space, continuing... size: {self.pipeline.get_queue_size('dataloader', 'generate')}")
+        
+        enhanced_print("dataloader", None, "Returning START")
+        return "START"
+    
+    @timing_decorator("dataloader")
+    async def process_data(self, data: Any) -> Any:
+        """Process data loading logic - block execution to ensure correctness"""
+        if data == "START":
+            # Block getting next batch
+            batch_result = next(self.batch_iter)
+            
+            cur_global_steps, gen_batch, batch_dict = batch_result
+            if gen_batch == PIPELINE_END_SIGNAL:
+                enhanced_print("dataloader", None, "dataloader loop finished.")
+                return "END"
+            
+            # Validate batch_dict data
+            if not batch_dict or not isinstance(batch_dict, dict):
+                enhanced_print("dataloader", None, f"Invalid batch_dict from trainer: {batch_dict}")
+                return None
+            
+            enhanced_print("dataloader", None, f"Returning batch for step {cur_global_steps}")
+            return (cur_global_steps, gen_batch, batch_dict)
+        else:
+            enhanced_print("dataloader", None, f"Unexpected data received: {data}, returning None")
+            return None
+    
+    async def send_output_data(self, data: Any) -> bool:
+        """Send data to trainer - ensure no data loss"""
+        if data == "END":
+            # Send END signal
+            await self.pipeline.push("dataloader", "rollout", PIPELINE_END_SIGNAL)
+            await self.pipeline.push("dataloader", "generate", PIPELINE_END_SIGNAL)
+            enhanced_print("dataloader", None, "Sent END signals to rollout and generate")
+            return True
+        elif isinstance(data, tuple):
+            cur_global_steps, gen_batch, batch_dict = data
+            # Push data, ensure no loss
+            await self.pipeline.push("dataloader", "rollout", (cur_global_steps, batch_dict))
+            await self.pipeline.push("dataloader", "generate", (cur_global_steps, gen_batch))
+            enhanced_print("dataloader", None, f"Sent batch for step {cur_global_steps} to rollout and generate")
+            return True
+        else:
+            enhanced_print("dataloader", None, f"Unexpected data: {type(data)}")
+        return False
+
+
+class RolloutStateMachine(BaseRoleStateMachine):
+    # Receive raw data from dataloader
+    # input: 
+    #   dataloader -> rollout
+    #   generate -> rollout
+    # output:
+    #   rollout -> ref_logp
+    #   rollout -> logp
+    #   rollout -> reward
+    #   rollout -> train
+    def __init__(self, pipeline, trainer):
+        super().__init__("rollout", pipeline)
+        self.trainer = trainer
+        
+        # Configure parameters
+        rollout_wg = trainer.rollout_wg
+        self._total_engines = rollout_wg.world_size
+        tp_size = trainer.config.actor_rollout_ref.rollout.tensor_model_parallel_size
+        self._tp_rank_0_engines = self._total_engines // tp_size
+        
+        # Check if resources are shared, TODO: support separated train vs logp/ref_logp
+        self._share_resources = trainer.config.trainer.get("share_resource_between_train_logp_ref_logp", True)
+    
+    async def get_input_data(self) -> Optional[Any]:
+        """Block getting dataloader; wait for generate data to arrive"""
+
+        data = await self.pipeline.pull("dataloader", "rollout")
+
+        if data == PIPELINE_END_SIGNAL:
+            return "END"
+        
+        # dataloader data arrives, cache and check if generate for corresponding step is already cached
+        cur_global_steps, train_batch = data
+
+        gen_step, gen_batch_output = await self.pipeline.pull("generate", "rollout")
+        enhanced_print("rollout", None, f"Received data of gen_step: {gen_step}, dataloader step: {cur_global_steps}")
+
+        return (cur_global_steps, train_batch, gen_batch_output)
+    
+    @timing_decorator("rollout")
+    async def process_data(self, data: Any) -> Any:
+        """Process training logic - block execution to ensure correctness"""
+        if data is None:
+            enhanced_print("rollout", None, "Received None data, waiting...")
+            return None
+        if data == "END":
+            return {"step": None, "batch_dict": None, "batch": None, "pipeline_signal": PIPELINE_END_SIGNAL}
+        
+        # Handle two cases: only dataloader data, or dataloader+generate data
+        assert isinstance(data, (int, tuple, list)) and len(data) == 3, f"Invalid data format: {data}"
+        # Check if it's (dataloader_data, generate_data) format
+        cur_global_steps, train_batch, gen_batch_output = data
+        batch_dict = train_batch
+        
+        metrics = {}
+        timing_raw = {}
+        
+        # Block process batch data
+        batch: DataProto = DataProto.from_single_dict(batch_dict)
+        # Need to repeat preprocessing part
+        _gen_batch = self.trainer._pre_process_batch(batch)
+        
+        with marked_timer("step", timing_raw):
+            batch.non_tensor_batch["uid"] = np.array([str(uuid.uuid4()) for _ in range(len(batch.batch))], dtype=object)
+            # repeat to align with repeated responses in rollout
+            batch = batch.repeat(repeat_times=self.trainer.config.actor_rollout_ref.rollout.n, interleave=True)
+            
+            # If there's generate data, merge it
+            if gen_batch_output is not None:
+                batch = batch.union(gen_batch_output)
+
+            batch.batch["response_mask"] = compute_response_mask(batch)
+            # Balance the number of valid tokens across DP ranks.
+            # NOTE: This usually changes the order of data in the `batch`,
+            # which won't affect the advantage calculation (since it's based on uid),
+            # but might affect the loss calculation (due to the change of mini-batching).
+            # TODO: Decouple the DP balancing and mini-batching.
+            if self.trainer.config.trainer.balance_batch:
+                self.trainer._balance_batch(batch, metrics=metrics)
+
+            # compute global_valid tokens
+            batch.meta_info["global_token_num"] = torch.sum(batch.batch["attention_mask"], dim=-1).tolist()
+            
+            # Hand over to train loop
+        
+        return {
+            "step": cur_global_steps,
+            "batch": batch,
+            "batch_dict": batch_dict,
+            "pipeline_signal": PIPELINE_START_SINGLE,
+        }
+
+    
+    async def send_output_data(self, data: Any) -> bool:
+        """Send training results - ensure no data loss"""
+        pipeline_signal = data["pipeline_signal"]
+        if pipeline_signal == PIPELINE_END_SIGNAL:
+            batch = PIPELINE_END_SIGNAL
+        else:
+            batch = data["batch"]
+        
+        # Push to all downstream, ensure no data loss
+        push_tasks = []
+        
+        if self.pipeline.is_in_pipeline("logp"):
+            push_tasks.append(self.pipeline.push("rollout", "logp", (data["step"], batch)))
+        
+        if self.pipeline.is_in_pipeline("ref_logp"):
+            push_tasks.append(self.pipeline.push("rollout", "ref_logp", (data["step"], batch)))
+        
+        if self.pipeline.is_in_pipeline("reward"):
+            push_tasks.append(self.pipeline.push("rollout", "reward", (data["step"], batch)))
+        
+        # Push to train
+        push_tasks.append(self.pipeline.push("rollout", "train", data))
+        
+        # Execute all pushes concurrently
+        if push_tasks:
+            await asyncio.gather(*push_tasks)
+            enhanced_print("rollout", None, f"Successfully pushed to {len(push_tasks)} downstream")
+        
+        enhanced_print("rollout", None, f"Sent step {data['step']} to downstream")
+        return True
+
+
+class TrainStateMachine(BaseRoleStateMachine):
+    """Enhanced trainer state machine, fully aligned with ray_async_pipeline_trainer.py switching conditions"""
+    # input: 
+    #   rollout -> train
+    #   logp -> train
+    #   ref_logp -> train
+    #   reward -> train
+    # output:
+    #   train -> param_update
+    def __init__(self, pipeline, trainer):
+        super().__init__("train", pipeline)
+        self.trainer = trainer
+        # Lazy initialization, avoid calling asyncio.run in constructor
+        self._init_completed = False
+            
+    async def _init_before_train(self):
+
+        from omegaconf import OmegaConf
+
+        from verl.utils.tracking import Tracking
+
+        self.logger = Tracking(
+            project_name=self.trainer.config.trainer.project_name,
+            experiment_name=self.trainer.config.trainer.experiment_name,
+            default_backend=self.trainer.config.trainer.logger,
+            config=OmegaConf.to_container(self.trainer.config, resolve=True),
+        )
+
+        # load checkpoint before doing anything
+        self.trainer._load_checkpoint()
+
+        # TODO: support async-rl validation
+        # # perform validation before training
+        # # currently, we only support validation using the reward_function.
+        # if self.val_reward_fn is not None and self.trainer.config.trainer.get("val_before_train", True):
+        #     print(f"===== validation before training =====", flush=True)
+        #     val_metrics = self._validate()
+        #     assert val_metrics, f"{val_metrics=}"
+        #     pprint(f"Initial validation metrics: {val_metrics}")
+        #     self.logger.log(data=val_metrics, step=self.global_steps)
+        #     if self.trainer.config.trainer.get("val_only", False):
+        #         return
+
+        # add tqdm
+        self.progress_bar = tqdm(total=self.trainer.total_training_steps, initial=self.trainer.global_steps, desc="Training Progress")
+
+        # we start from step 1
+        self.trainer.global_steps += 1
+        self.last_val_metrics = None
+
+    async def get_input_data(self) -> Optional[Any]:
+        """Block getting batch data, ensure dependencies"""
+        
+        # Ensure initialization is complete
+        if not self._init_completed:
+            await self._init_before_train()
+            self._init_completed = True
+        
+        # Block waiting for rollout data
+        data = await self.pipeline.pull("rollout", "train")
+        
+        if data is None:
+            return None
+        if data["pipeline_signal"] == PIPELINE_END_SIGNAL:
+            return "END"
+        
+        # Block waiting for all dependency data
+        logp_result = await self.pipeline.pull("logp", "train")
+        ref_logp_result = await self.pipeline.pull("ref_logp", "train")
+        reward_result = await self.pipeline.pull(src_role="reward", dst_role="train")
+        
+        # Check if all data is received
+        if logp_result is None or ref_logp_result is None or reward_result is None:
+            return None
+            
+        logp_step, old_log_prob = logp_result
+        ref_logp_step, ref_log_prob = ref_logp_result
+        reward_step, reward_tensor, reward_extra_infos_dict = reward_result
+        
+        # Verify step consistency
+        assert logp_step == ref_logp_step == reward_step, f"Step mismatch: logp_step={logp_step}, ref_logp_step={ref_logp_step}, reward_step={reward_step}"
+        
+        enhanced_print("train", None, f"Successfully assembled data for step {logp_step}")
+        return (data, old_log_prob, ref_log_prob, reward_tensor, reward_extra_infos_dict)
+    
+    @timing_decorator("train")
+    async def process_data(self, data: Any) -> Any:
+        """Process training logic, fully aligned with ray_async_pipeline_trainer.py training flow"""
+        if data == "END":
+            return "END"
+        
+        data, old_log_prob, ref_log_prob, reward_tensor, reward_extra_infos_dict = data
+        batch = data["batch"]
+        cur_global_steps = data["step"]
+        enhanced_print("train", None, f"Processing step {cur_global_steps}, train_step: {self.trainer.global_steps}")
+        
+        metrics = {}
+        timing_raw = {}
+        
+        is_last_step = self.trainer.global_steps >= self.trainer.total_training_steps
+        
+        with marked_timer("step", timing_raw):
+            # Acquire resource lock (for training phase)
+            await resource_lock.acquire("train", self.trainer.global_steps)
+
+            # recompute old_log_probs
+            with marked_timer("old_log_prob", timing_raw):
+                entropys = old_log_prob.batch["entropys"]
+                response_masks = batch.batch["response_mask"]
+                loss_agg_mode = self.trainer.config.actor_rollout_ref.actor.loss_agg_mode
+                entropy_loss = agg_loss(loss_mat=entropys, loss_mask=response_masks, loss_agg_mode=loss_agg_mode)
+                old_log_prob_metrics = {"actor/entropy_loss": entropy_loss.detach().item()}
+                metrics.update(old_log_prob_metrics)
+                old_log_prob.batch.pop("entropys")
+                
+                # Merge old_log_prob to batch
+                batch = batch.union(old_log_prob)
+
+                if "rollout_log_probs" in batch.batch.keys():
+                    # TODO: we may want to add diff of probs too.
+                    rollout_old_log_probs = batch.batch["rollout_log_probs"]
+                    actor_old_log_probs = batch.batch["old_log_probs"]
+                    attention_mask = batch.batch["attention_mask"]
+                    responses = batch.batch["responses"]
+                    response_length = responses.size(1)
+                    response_mask = attention_mask[:, -response_length:]
+
+                    rollout_probs = torch.exp(rollout_old_log_probs)
+                    actor_probs = torch.exp(actor_old_log_probs)
+                    rollout_probs_diff = torch.abs(rollout_probs - actor_probs)
+                    rollout_probs_diff = torch.masked_select(rollout_probs_diff, response_mask.bool())
+                    rollout_probs_diff_max = torch.max(rollout_probs_diff)
+                    rollout_probs_diff_mean = torch.mean(rollout_probs_diff)
+                    rollout_probs_diff_std = torch.std(rollout_probs_diff)
+                    metrics.update(
+                        {
+                            "training/rollout_probs_diff_max": rollout_probs_diff_max.detach().item(),
+                            "training/rollout_probs_diff_mean": rollout_probs_diff_mean.detach().item(),
+                            "training/rollout_probs_diff_std": rollout_probs_diff_std.detach().item(),
+                        }
+                    )
+            
+            if self.trainer.use_reference_policy:
+                # compute reference log_prob
+                with marked_timer("ref", timing_raw):
+                    batch = batch.union(ref_log_prob)
+
+            # compute values; TODO support async-rl
+            if self.trainer.use_critic:
+                with marked_timer("values", timing_raw):
+                    values = self.critic_wg.compute_values(batch)
+                    batch = batch.union(values)
+
+            with marked_timer("adv", timing_raw):
+                batch.batch["token_level_scores"] = reward_tensor
+
+                if reward_extra_infos_dict:
+                    batch.non_tensor_batch.update({k: np.array(v) for k, v in reward_extra_infos_dict.items()})
+
+                # compute rewards. apply_kl_penalty if available
+                if self.trainer.config.algorithm.use_kl_in_reward:
+                    batch, kl_metrics = apply_kl_penalty(batch, kl_ctrl=self.kl_ctrl_in_reward, kl_penalty=self.trainer.config.algorithm.kl_penalty)
+                    metrics.update(kl_metrics)
+                else:
+                    batch.batch["token_level_rewards"] = batch.batch["token_level_scores"]
+
+                # compute advantages, executed on the driver process
+
+                norm_adv_by_std_in_grpo = self.trainer.config.algorithm.get("norm_adv_by_std_in_grpo", True)  # GRPO adv normalization factor
+
+                batch = compute_advantage(
+                    batch,
+                    adv_estimator=self.trainer.config.algorithm.adv_estimator,
+                    gamma=self.trainer.config.algorithm.gamma,
+                    lam=self.trainer.config.algorithm.lam,
+                    num_repeat=self.trainer.config.actor_rollout_ref.rollout.n,
+                    norm_adv_by_std_in_grpo=norm_adv_by_std_in_grpo,
+                    config=self.trainer.config.algorithm,
+                )
+
+            # update critic
+            if self.trainer.use_critic:
+                with marked_timer("update_critic", timing_raw):
+                    critic_output = self.critic_wg.update_critic(batch)
+                critic_output_metrics = reduce_metrics(critic_output.meta_info["metrics"])
+                metrics.update(critic_output_metrics)
+
+            # implement critic warmup
+            if self.trainer.config.trainer.critic_warmup <= self.trainer.global_steps:
+                # update actor
+                with marked_timer("update_actor", timing_raw):
+                    batch.meta_info["multi_turn"] = self.trainer.config.actor_rollout_ref.rollout.multi_turn.enable
+                    actor_output = self.trainer.actor_wg.update_actor(batch)
+
+                actor_output_metrics = reduce_metrics(actor_output.meta_info["metrics"])
+                metrics.update(actor_output_metrics)
+
+            # Log rollout generations if enabled
+            rollout_data_dir = self.trainer.config.trainer.get("rollout_data_dir", None)
+            if rollout_data_dir:
+                with marked_timer("dump_rollout_generations", timing_raw):
+                    print(batch.batch.keys())
+                    inputs = self.trainer.tokenizer.batch_decode(batch.batch["prompts"], skip_special_tokens=True)
+                    outputs = self.trainer.tokenizer.batch_decode(batch.batch["responses"], skip_special_tokens=True)
+                    scores = batch.batch["token_level_scores"].sum(-1).cpu().tolist()
+                    self.trainer._dump_generations(
+                        inputs=inputs,
+                        outputs=outputs,
+                        scores=scores,
+                        reward_extra_infos_dict=reward_extra_infos_dict,
+                        dump_path=rollout_data_dir,
+                    )
+
+            if self.trainer.config.trainer.save_freq > 0 and (is_last_step or self.trainer.global_steps % self.trainer.config.trainer.save_freq == 0):
+                with marked_timer("save_checkpoint", timing_raw):
+                    worker = self.trainer.actor_rollout_wg if "actor_rollout" in self.trainer.resource_pool_to_cls else self.trainer.actor_wg
+                    self.trainer._save_checkpoint(worker)
+
+        # Release resource lock
+        await resource_lock.release("train", self.trainer.global_steps)
+
+        # training metrics
+        metrics.update(
+            {
+                "training/global_step": self.trainer.global_steps,
+                "training/epoch": self.trainer.epoch,
+            }
+        )
+        # collect metrics
+        metrics.update(compute_data_metrics(batch=batch, use_critic=self.trainer.use_critic))
+        metrics.update(compute_timing_metrics(batch=batch, timing_raw=timing_raw))
+        # TODO: implement actual tflpo and theoretical tflpo
+        n_gpus = self.trainer.resource_pool_manager.get_n_gpus()
+        metrics.update(compute_throughout_metrics(batch=batch, timing_raw=timing_raw, n_gpus=n_gpus))
+
+        # TODO: make a canonical logger that supports various backend
+        self.logger.log(data=metrics, step=self.trainer.global_steps)
+
+        self.progress_bar.update(1)
+        self.trainer.global_steps += 1
+        if is_last_step:
+            pprint(f"Final validation metrics: {self.last_val_metrics}")
+            self.progress_bar.close()
+            return "END"
+        
+        # Return the current completed training step, representing model_steps
+        return self.trainer.global_steps - 1
+    
+    async def send_output_data(self, data: Any) -> bool:
+        """Send training results - block to ensure data transfer"""
+        if data == "END":
+            data = PIPELINE_END_SIGNAL
+
+        global_steps = data
+        # Block push to param_update, ensure parameter update
+        await self.pipeline.push(src_role="train", dst_role="param_update", data=global_steps)
+        enhanced_print("train", None, f"Sent training completion signal for step {global_steps} to param_update")
+
+        # If train/logp/ref_logp share resources, block push to logp/ref_logp to ensure no resource contention
+        if self.trainer.config.trainer.get("share_resource_between_train_logp_ref_logp", True):
+            await self.pipeline.push(src_role="train", dst_role="logp", data=global_steps)
+            await self.pipeline.push(src_role="train", dst_role="ref_logp", data=global_steps)
+            enhanced_print("train", None, f"Sent training completion signal for step {global_steps} to logp/ref_logp")
+        
+        return True
+
+
+class RewardStateMachine(BaseRoleStateMachine):
+    """Enhanced reward state machine, using blocking mode to ensure dependencies"""
+    # input: 
+    #   rollout -> reward
+    # output:
+    #   reward -> train
+    
+    def __init__(self, pipeline, trainer):
+        super().__init__("reward", pipeline)
+        self.trainer = trainer
+        
+    async def get_input_data(self) -> Optional[Any]:
+        """Block getting reward calculation data, ensure dependencies"""
+        try:
+            # Block waiting for data, no timeout set
+            reward_data = await self.pipeline.pull("rollout", "reward")
+            if reward_data is None:
+                enhanced_print("reward", None, "Received None from rollout, waiting...")
+                return None
+            return reward_data
+        except Exception as e:
+            enhanced_print("reward", None, f"Error in get_input_data: {e}")
+            return None
+    
+    @timing_decorator("reward")
+    async def process_data(self, data: Any) -> Any:
+        """Process reward calculation logic - block execution to ensure correctness"""
+        if data is None:
+            enhanced_print("reward", None, "Received None data, waiting...")
+            return None
+        
+        step, batch = data
+        if batch == PIPELINE_END_SIGNAL:
+            return "END"
+        enhanced_print("reward", None, f"Computing reward for step {step}")
+        
+        # Initialize variables
+        reward_tensor = None
+        reward_extra_infos_dict = {}
+        
+        # Block execute reward calculation
+        if self.trainer.use_rm:
+            reward_tensor = self.trainer.rm_wg.compute_rm_score(batch)
+
+        if self.trainer.config.reward_model.launch_reward_fn_async:
+            future_reward = compute_reward_async.remote(batch, self.trainer.config, self.trainer.tokenizer)
+            # Need to wait for future_reward to complete, but now use sync version first
+            enhanced_print("reward", None, "Using async reward computation - converting to sync")
+            # reward_tensor, reward_extra_infos_dict = compute_reward(batch, self.trainer.reward_fn)
+        else:
+            reward_tensor, reward_extra_infos_dict = compute_reward(batch, self.trainer.reward_fn)
+
+        return (step, reward_tensor, reward_extra_infos_dict)
+    
+    async def send_output_data(self, data: Any) -> bool:
+        """Send reward results - block to ensure data transfer"""
+        if data is None:
+            return False
+        if data == "END":
+            # no need to send to train queue
+            return True
+        step, reward_tensor, reward_extra_infos_dict = data
+        # Block send results, ensure train can receive data
+        await self.pipeline.push("reward", "train", (step, reward_tensor, reward_extra_infos_dict))
+        enhanced_print("reward", None, f"Sent reward result for step {step}")
+        return True
+
+
+class LogPStateMachine(BaseRoleStateMachine):
+    """Enhanced LogP state machine, using blocking mode to ensure dependencies"""
+    # input: 
+    #   rollout -> logp
+    # output:
+    #   logp -> train
+    
+    def __init__(self, pipeline, trainer):
+        super().__init__("logp", pipeline)
+        self.trainer = trainer
+        
+    async def get_input_data(self) -> Optional[Any]:
+        """Block getting LogP calculation data, ensure dependencies"""
+        try:
+            # Block waiting for data, no timeout set
+            batch = await self.pipeline.pull("rollout", "logp")
+
+            if batch is None:
+                enhanced_print("logp", None, "Received None from rollout, waiting...")
+                return None
+            
+            if batch == PIPELINE_END_SIGNAL:
+                return "END"
+            
+            # If logp/ref_logp and train share resources, wait for train's previous step to complete
+            if self.trainer.config.trainer.get("share_resource_between_train_logp_ref_logp", True):
+                enhanced_print("logp", None, f"Waiting for train to complete")
+                await self.pipeline.pull("train", "logp")
+                
+                enhanced_print("logp", None, f"Train step completed, continuing with logp")
+            return batch
+        except Exception as ex:
+            enhanced_print("logp", None, f"Error in get_input_data: {ex}")
+            return None
+    
+    @timing_decorator("logp")
+    async def process_data(self, data: Any) -> Any:
+        """Process LogP calculation logic - block execution to ensure correctness"""
+        if data is None:
+            enhanced_print("logp", None, "Received None data, waiting...")
+            return None
+        
+        if data == "END":
+            return "END"
+        step, batch = data
+        if batch == PIPELINE_END_SIGNAL:
+            return "END"
+        enhanced_print("logp", None, f"Computing logp for step {step}")
+        
+        # Acquire resource lock, pass step parameter
+        await resource_lock.acquire("logp", step)
+        
+        # Block execute LogP calculation (direct call, not using asyncio.to_thread)
+        enhanced_print("logp", None, f"Starting LogP computation for step {step}")
+        old_log_prob = self.trainer.actor_wg.compute_log_prob(batch)
+        enhanced_print("logp", None, f"LogP computation finished for step {step}")
+        
+        # Release resource lock
+        await resource_lock.release("logp")
+        
+        return (step, old_log_prob)
+    
+    async def send_output_data(self, data: Any) -> bool:
+        """Send LogP results - block to ensure data transfer"""
+        if data is None:
+            return False
+        if data == "END":
+            # no need to send to train queue
+            return True
+        
+        step, logp_result = data
+        # Block send results, ensure train can receive data
+        await self.pipeline.push("logp", "train", (step, logp_result))
+        enhanced_print("logp", None, f"Sent logp result for step {step}")
+
+        if self.trainer.config.trainer.get("share_resource_between_train_logp_ref_logp", True):
+            await self.pipeline.push("logp", "ref_logp", step)
+        return True
+
+
+class RefLogPStateMachine(BaseRoleStateMachine):
+    """Enhanced reference LogP state machine, using blocking mode to ensure dependencies"""
+    # input: 
+    #   rollout -> ref_logp
+    # output:
+    #   ref_logp -> train
+    def __init__(self, pipeline, trainer):
+        super().__init__("ref_logp", pipeline)
+        self.trainer = trainer
+        
+    async def get_input_data(self) -> Optional[Any]:
+        """Block getting reference LogP calculation data, ensure dependencies"""
+        try:
+            # Block waiting for data, no timeout set
+            batch = await self.pipeline.pull("rollout", "ref_logp")
+            if batch == PIPELINE_END_SIGNAL:
+                return "END"
+
+            if batch is None:
+                enhanced_print("ref_logp", None, "Received None from rollout, waiting...")
+                return None
+            
+            if self.trainer.config.trainer.get("share_resource_between_train_logp_ref_logp", True):
+                enhanced_print("ref_logp", None, f"Waiting for train to complete")
+                await self.pipeline.pull("train", "ref_logp")
+                await self.pipeline.pull("logp", "ref_logp")
+                enhanced_print("ref_logp", None, f"Train step completed, continuing with ref_logp")
+            return batch
+        except Exception as ex:
+            enhanced_print("ref_logp", None, f"Error in get_input_data: {ex}")
+            return None
+    
+    @timing_decorator("ref_logp")
+    async def process_data(self, data: Any) -> Any:
+        """Process reference LogP calculation logic - block execution to ensure correctness"""
+        if data is None:
+            enhanced_print("ref_logp", None, "Received None data, waiting...")
+            return None
+        
+        if data == "END":
+            return "END"
+        
+        step, batch = data
+        if batch == PIPELINE_END_SIGNAL:
+            return "END"
+        enhanced_print("ref_logp", None, f"Computing ref_logp for step {step}")
+
+        # Acquire resource lock, pass step parameter
+        await resource_lock.acquire("ref_logp", step)
+        
+        # Block execute reference LogP calculation (direct call, not using asyncio.to_thread)
+        enhanced_print("ref_logp", None, f"Starting Ref LogP computation for step {step}")
+        if not self.trainer.ref_in_actor:
+            ref_log_prob = self.trainer.ref_policy_wg.compute_ref_log_prob(batch)
+        else:
+            ref_log_prob = self.trainer.actor_wg.compute_ref_log_prob(batch)
+        enhanced_print("ref_logp", None, f"Ref LogP computation finished for step {step}")
+        
+        # Release resource lock
+        await resource_lock.release("ref_logp")
+        
+        return (step, ref_log_prob)
+    
+    async def send_output_data(self, data: Any) -> bool:
+        """Send reference LogP results - block to ensure data transfer"""
+        if data is None:
+            return False
+        if data == "END":
+            # no need to send to train queue
+            return True
+        
+        step, ref_logp_result = data
+        # Block send results, ensure train can receive data
+        await self.pipeline.push("ref_logp", "train", (step, ref_logp_result))
+        enhanced_print("ref_logp", None, f"Sent ref_logp result for step {step}")
+        return True
+
+
+class ParamUpdateStateMachine(BaseRoleStateMachine):
+    """asyncRLparameterupdatestate machine - 使用param_update.py中的方法进行asyncsynchronization"""
+    
+    def __init__(self, pipeline, trainer):
+        super().__init__("param_update", pipeline)
+        self._debug = False  # sync update by all params
+        self.trainer = trainer
+        self.stats = {
+            "updates": 0,
+            "async_updates": 0,
+            "sync_updates": 0,
+            "total_time": 0.0,
+            "avg_time": 0.0,
+            "min_time": float('inf'),
+            "max_time": 0.0
+        }
+        
+        # 检查trainer是否有param_update_manager
+        rollout_wg = self.trainer.rollout_wg
+        actor_wg = self.trainer.actor_wg
+        self.has_param_update_manager = hasattr(actor_wg, 'async_param_update')
+        
+        if self.has_param_update_manager:
+            enhanced_print("AsyncRLParamUpdate", None, "Using param_update_manager for async parameter synchronization")
+        else:
+            enhanced_print("AsyncRLParamUpdate", None, "param_update_manager not available, falling back to sync update")
+    
+    async def get_input_data(self) -> Optional[Any]:
+        """gettingparameterupdate请求"""
+        try:
+            data = await self.pipeline.pull("train", "param_update")
+            if data == PIPELINE_END_SIGNAL:
+                return "END"
+            elif data is None:
+                enhanced_print("param_update", None, "Received None from train, waiting...")
+                return None
+            
+            enhanced_print("param_update", None, f"Received param update request for step {data}")
+            return data
+            
+        except Exception as e:
+            enhanced_print("param_update", None, f"Error in get_input_data: {e}")
+            return None
+    
+    @timing_decorator("param_update")
+    async def process_data(self, data: Any) -> Any:
+        """Processparameterupdate请求 - 后台线程execute"""
+        if data == "END":
+            return "END"
+        elif data is None:
+            enhanced_print("param_update", None, "Received None data, waiting...")
+            return None
+        
+        global_steps = data
+
+        enhanced_print("param_update", None, f"Starting param update for step {global_steps}")
+        
+        # 记录开始时间
+        start_time = time.time()
+        # 后续step在后台线程中execute，不阻塞主流程
+        if self.has_param_update_manager:
+            # 启动后台asyncparameterupdate任务
+            param_update_task = asyncio.create_task(
+                self._perform_async_param_update_background(global_steps)
+            )
+            self.stats["async_updates"] += 1
+            enhanced_print("param_update", None, f"Async param update task created for step {global_steps}")
+        else:
+            # 启动后台synchronizationparameterupdate任务
+            param_update_task = asyncio.create_task(
+                asyncio.to_thread(self._perform_sync_param_update_background, global_steps)
+            )
+            self.stats["sync_updates"] += 1
+            enhanced_print("param_update", None, f"Sync param update task created for step {global_steps}")
+        
+        # 立即return，不waitingparameterupdatecompleted
+        task_creation_time = time.time() - start_time
+        
+        enhanced_print("param_update", None, 
+                        f"Param update task created for step {global_steps} in {task_creation_time:.3f}s (background execution)")
+        
+        # return任务对象，让send_output_datawaitingcompleted
+        return (global_steps, param_update_task)
+
+    async def _perform_async_param_update_background(self, global_steps: int) -> bool:
+        """后台asyncparameterupdate - 后续step使用"""
+        enhanced_print("param_update", None, f"Background async param update started for step {global_steps}")
+        
+        # getting锁（在后台线程中）
+        await resource_lock.acquire("param_update", global_steps)
+
+        start_time = time.time()
+        
+        # 使用param_update_manager的async_param_update方法
+        self.trainer.actor_wg.async_param_update()
+        self.trainer.rollout_wg.async_param_update()
+        
+        # waitingsendcompleted
+        self.trainer.actor_wg.wait_for_send_complete()
+        
+        update_time = time.time() - start_time
+        
+        # update统计
+        self.stats["updates"] += 1
+        self.stats["total_time"] += update_time
+        self.stats["avg_time"] = self.stats["total_time"] / self.stats["updates"]
+        self.stats["min_time"] = min(self.stats["min_time"], update_time)
+        self.stats["max_time"] = max(self.stats["max_time"], update_time)
+        
+        enhanced_print("param_update", None, 
+                        f"Background async param update completed for step {global_steps} in {update_time:.3f}s")
+        
+        # 释放锁
+        await resource_lock.release("param_update", global_steps)
+
+        return True
+            
+
+    async def _perform_sync_param_update_background(self, global_steps: int) -> bool:
+        enhanced_print("param_update", None, f"Background sync param update started for step {global_steps}")
+        
+        await resource_lock.acquire("param_update", global_steps)
+        
+        start_time = time.time()
+        
+        def sync_param_update():
+            enhanced_print("param_update", None, f"Syncing actor parameters for step {global_steps}...")
+            actor_result = self.trainer.actor_wg.sync_per_tensor_generator()
+            
+            enhanced_print("param_update", None, f"Syncing rollout parameters for step {global_steps}...")
+            rollout_result = self.trainer.rollout_wg.sync_per_tensor_generator()
+            
+            if hasattr(rollout_result, '__class__') and 'ObjectRef' in str(type(rollout_result)):
+                ray.get(rollout_result)
+            
+            enhanced_print("param_update", None, f"Parameter sync completed for step {global_steps}")
+            return True
+        
+        success = await asyncio.to_thread(sync_param_update)
+        
+        update_time = time.time() - start_time
+        
+        self.stats["updates"] += 1
+        self.stats["total_time"] += update_time
+        self.stats["avg_time"] = self.stats["total_time"] / self.stats["updates"]
+        self.stats["min_time"] = min(self.stats["min_time"], update_time)
+        self.stats["max_time"] = max(self.stats["max_time"], update_time)
+        
+        enhanced_print("param_update", None, 
+                        f"Background sync param update completed for step {global_steps} in {update_time:.3f}s")
+        
+        await resource_lock.release("param_update", global_steps)
+        
+        return success
+        
+
+    async def send_output_data(self, data: Any) -> bool:
+        """sendupdatecompleted信号 - waitingasync任务completed后push"""
+        if data == "END":
+            enhanced_print("param_update", None, "Async param update completed, END signal processed")
+            await self.pipeline.push("param_update", "generate", PIPELINE_END_SIGNAL)
+            return True
+        elif data is None:
+            return False
+        
+        # 检查是否是任务对象
+        if isinstance(data, tuple) and len(data) == 2:
+            global_steps, param_update_task = data
+            enhanced_print("param_update", None, f"Waiting for background param update task to complete for step {global_steps}")
+            
+            # waiting后台任务completed
+            success = await param_update_task
+            if success:
+                enhanced_print("param_update", None, f"Background param update completed for step {global_steps}")
+            else:
+                enhanced_print("param_update", None, f"Background param update failed for step {global_steps}")
+        else:
+            # 第一个step的情况，data就是global_steps
+            global_steps = data
+        
+        # unified在这里Processpush操作
+        enhanced_print("param_update", None, f"Sending completion signal to generate for step {global_steps}")
+        await self.pipeline.push("param_update", "generate", global_steps)
+        enhanced_print("param_update", None, f"Sent completion signal to generate for step {global_steps}")
+        return True
+    
+    def get_status_info(self) -> Dict[str, Any]:
+        async_rl_stats = {}
+        if self.has_param_update_manager and hasattr(self.trainer, 'param_update_manager'):
+            async_rl_stats = self.trainer.param_update_manager.get_async_rl_stats()
+        
+        return {
+            "stats": self.stats.copy(),
+            "type": "async_param_update",
+            "has_param_update_manager": self.has_param_update_manager,
+            "async_rl_stats": async_rl_stats,
+            "description": "Async param update with param_update_manager"
+        }
+
+
+class GenerateStateMachine(BaseRoleStateMachine):
+    """asyncRLgenerationstate machine"""
+    
+    def __init__(self, pipeline, trainer):
+        super().__init__("generate", pipeline)
+        self.trainer = trainer
+        self.first_generation = True
+        
+        # set offpolicy steps: generate ahead param_update
+        self.generate_ahead_steps = trainer.config.trainer.get("generate_ahead_steps", 3)
+        self.last_param_update_step = 0
+
+        enhanced_print("generate", None, f"Configured generate ahead: {self.generate_ahead_steps} steps")
+
+    @timing_decorator("generate")
+    async def process_data(self, data: Any) -> Any:
+        """Processgenerationlogic"""
+        if data == "END":
+            return "END"
+        elif data is None:
+            enhanced_print("generate", None, "Received None data, waiting...")
+            return None
+        
+        step, gen_batch = data
+        enhanced_print("generate", None, f"Starting generation task for step {step}")
+        
+        generation_task = asyncio.create_task(
+            asyncio.to_thread(self._generate_sync, gen_batch, step)
+        )
+        
+        # updategenerate_global_step
+        self.trainer.generate_global_step += 1
+        
+        enhanced_print("generate", None, f"Generation task created for step {step}")
+        
+        return (step, generation_task)
+
+    async def send_output_data(self, data: Any) -> bool:
+        """sendgenerationresults"""
+        if data == "END":
+            enhanced_print("generate", None, "Sending END signal to rollout")
+            await self.pipeline.push("generate", "rollout", PIPELINE_END_SIGNAL)
+            return True
+        elif data is None:
+            return False
+        
+        if isinstance(data, tuple) and len(data) == 2:
+            step, generation_task = data
+            enhanced_print("generate", None, f"Waiting for background generation task to complete for step {step}")
+            
+            gen_batch_output = await generation_task
+            if gen_batch_output is None:
+                raise Exception(f"Generation failed for step {step}")
+            
+            enhanced_print("generate", None, f"Background generation completed for step {step}")
+
+        else:
+            step, gen_batch_output = data
+        
+        enhanced_print("generate", None, f"Sending generation result to rollout for step {step}")
+        await self.pipeline.push("generate", "rollout", (step, gen_batch_output))
+        enhanced_print("generate", None, f"Generation result sent to rollout for step {step}")
+        
+        return True
+    
+    async def get_input_data(self) -> Optional[Any]:
+        """gettinggenerationdata"""        
+        # first generation need waiting param_update completed
+        if self.first_generation:
+            enhanced_print("generate", None, "First generation, waiting for initial param_update to complete...")
+            param_update_signal = await self.pipeline.pull("param_update", "generate")
+            
+            if param_update_signal == PIPELINE_END_SIGNAL:
+                enhanced_print("generate", None, "Received END signal from param_update")
+                return "END"
+            elif param_update_signal is None:
+                return None
+            
+            self.last_param_update_step = param_update_signal
+            enhanced_print("generate", None, f"First generation: received param_update completion signal for step {param_update_signal}")
+            self.first_generation = False
+        
+        # waiting dataloader
+        data = await self.pipeline.pull("dataloader", "generate")
+        
+        if data == PIPELINE_END_SIGNAL:
+            enhanced_print("generate", None, "Received END signal from dataloader")
+            return "END"
+        elif data is None:
+            return None
+        
+        if not isinstance(data, (tuple, list)) or len(data) != 2:
+            enhanced_print("generate", None, f"Invalid data format: {data}")
+            return None
+            
+        step, gen_batch = data
+        
+        # Check the distance between generate and param_update. 
+        # If it is too far, Blocking to waiting param_update.
+        while step > self.last_param_update_step + self.generate_ahead_steps:
+            enhanced_print("generate", None, f"Step {step} is too far ahead of param_update {self.last_param_update_step}, waiting for next param_update...")
+            param_update_signal = await self.pipeline.pull("param_update", "generate")
+            
+            if param_update_signal == PIPELINE_END_SIGNAL:
+                enhanced_print("generate", None, "Received END signal from param_update while waiting")
+                return "END"
+            elif param_update_signal is None:
+                return None
+            
+            self.last_param_update_step = param_update_signal
+            enhanced_print("generate", None, f"Updated param_update step to {param_update_signal}")
+        
+        enhanced_print("generate", None, f"Got generation task for step {step}")
+        return (step, gen_batch)
+    
+    def _generate_sync(self, gen_batch, step: int):
+        """synchronization generation"""
+        enhanced_print("generate", None, f"Background generation started for step {step}")
+        
+        start_time = time.time()
+        
+        # executegeneration
+        wg = self.trainer.rollout_wg
+        gen_batch_output = wg.generate_sequences_sperated(gen_batch)
+        
+        generation_time = time.time() - start_time
+        
+        enhanced_print("generate", None, f"Background generation completed for step {step} in {generation_time:.3f}s")
+        
+        return gen_batch_output
+
+
+    def get_status_info(self) -> Dict[str, Any]:
+        """getting detailed status information"""
+        async_rl_stats = {}
+        return {
+            "type": "async_rl_generate",
+            "has_async_rl_support": True,
+            "async_rl_stats": async_rl_stats,
+            "description": "Async RL generate with interruptible generation"
+        }
+
+
+def create_role_state_machine(role_name: str, pipeline, trainer, use_async_rl: bool = False) -> BaseRoleStateMachine:
+    """
+    Create a role state machine factory function
+    
+    Args:
+        role_name: role name, e.g., "dataloader", "rollout", "reward", "param_update", "generate", "logp", "ref_logp", "train"
+        pipeline: pipeline instance
+        trainer: trainer instance
+        use_async_rl: Whether to use async RL optimization (default False)
+    """
+    state_machines = {
+        "dataloader": DataloaderStateMachine,
+        "rollout": RolloutStateMachine,
+        "reward": RewardStateMachine,
+        "param_update": ParamUpdateStateMachine,
+        "generate": GenerateStateMachine,
+        "logp": LogPStateMachine,
+        "ref_logp": RefLogPStateMachine,
+        "train": TrainStateMachine,
+    }
+    enhanced_print("create_role_state_machine", None, 
+                 f"Creating {role_name} state machine with async RL optimizations (dual buffer + interruptible generation)")
+    
+    if role_name in state_machines:
+        return state_machines[role_name](pipeline, trainer)
+    else:
+        raise ValueError(f"Unknown role name: {role_name}")
diff --git a/verl/trainer/ppo/pipeline/utils.py b/verl/trainer/ppo/pipeline/utils.py
new file mode 100644
index 00000000000..0b8ec1d5134
--- /dev/null
+++ b/verl/trainer/ppo/pipeline/utils.py
@@ -0,0 +1,181 @@
+"""
+Pipeline utilities and common tools for state machines.
+
+This module contains shared utilities, decorators, and helper classes
+used across different state machine implementations.
+"""
+
+import asyncio
+import time
+from typing import Dict, Any, Optional
+from enum import Enum, auto
+
+
+class ResourceLock:
+    """Resource lock, used to control resource occupancy of train/logp/ref_logp"""
+    
+    def __init__(self):
+        self._lock = asyncio.Lock()
+        self._current_owner = None
+        self._waiting_queue = []
+        self._train_completed_steps = set()  # Record completed train steps
+    
+    async def acquire(self, owner_name: str, step: int = None) -> bool:
+        """Get resource lock"""
+        if self._current_owner == owner_name:
+            return True  # Already owner
+        
+        # Add to waiting queue
+        if owner_name not in self._waiting_queue:
+            self._waiting_queue.append(owner_name)
+        
+        async with self._lock:
+            # Wait for resource available
+            while self._current_owner is not None and self._current_owner != owner_name:
+                from .pipeline_utils import enhanced_print
+                enhanced_print("ResourceLock", None, f"{owner_name} waiting for resource, current owner: {self._current_owner}")
+                await asyncio.sleep(1)
+            
+            # Get resource
+            self._current_owner = owner_name
+            if owner_name in self._waiting_queue:
+                self._waiting_queue.remove(owner_name)
+            from .pipeline_utils import enhanced_print
+            enhanced_print("ResourceLock", None, f"{owner_name} acquired resource lock")
+            return True
+    
+    async def release(self, owner_name: str, step: int = None):
+        """Release resource lock"""
+        if self._current_owner == owner_name:
+            self._current_owner = None
+            
+            # If train completed, record step
+            if owner_name == "train" and step is not None:
+                self._train_completed_steps.add(step)
+                from .pipeline_utils import enhanced_print
+                enhanced_print("ResourceLock", None, f"Train step {step} completed, available steps: {sorted(self._train_completed_steps)}")
+            
+            from .pipeline_utils import enhanced_print
+            enhanced_print("ResourceLock", None, f"{owner_name} released resource lock")
+            if self._waiting_queue:
+                enhanced_print("ResourceLock", None, f"Next in queue: {self._waiting_queue[0]}")
+        else:
+            from .pipeline_utils import enhanced_print
+            enhanced_print("ResourceLock", None, f"Warning: {owner_name} tried to release lock owned by {self._current_owner}")
+    
+    def get_status(self) -> Dict[str, Any]:
+        """Get lock status"""
+        return {
+            "current_owner": self._current_owner,
+            "waiting_queue": self._waiting_queue.copy(),
+            "train_completed_steps": sorted(self._train_completed_steps)
+        }
+
+
+class TimingStatsCollector:
+    """Tool class to collect and manage timing statistics data"""
+    
+    def __init__(self):
+        self.stats = {}
+        self.step_count = 0
+    
+    def record_timing(self, role_name: str, step: Any, duration: float):
+        """Record execution time"""
+        if role_name not in self.stats:
+            self.stats[role_name] = {
+                'total_time': 0.0,
+                'count': 0,
+                'avg_time': 0.0,
+                'min_time': float('inf'),
+                'max_time': 0.0,
+                'recent_times': []
+            }
+        
+        stats = self.stats[role_name]
+        stats['total_time'] += duration
+        stats['count'] += 1
+        stats['avg_time'] = stats['total_time'] / stats['count']
+        stats['min_time'] = min(stats['min_time'], duration)
+        stats['max_time'] = max(stats['max_time'], duration)
+        
+        # Keep recent 10 execution times
+        stats['recent_times'].append(duration)
+        if len(stats['recent_times']) > 10:
+            stats['recent_times'].pop(0)
+        
+        # Print real-time statistics, but reduce frequency
+        if stats['count'] % 10 == 0 or duration > 1.0:  # Print every 10 times or more than 1 second
+            from .pipeline_utils import enhanced_print
+            enhanced_print(role_name, None, f"Step {step}: process_data took {duration:.2f}s "
+                  f"(avg: {stats['avg_time']:.2f}s, min: {stats['min_time']:.2f}s, max: {stats['max_time']:.2f}s)")
+    
+    def get_summary(self) -> Dict[str, Any]:
+        """Get statistics summary"""
+        summary = {}
+        for role_name, stats in self.stats.items():
+            summary[role_name] = {
+                'avg_time': stats['avg_time'],
+                'min_time': stats['min_time'],
+                'max_time': stats['max_time'],
+                'total_count': stats['count'],
+                'recent_avg': sum(stats['recent_times']) / len(stats['recent_times']) if stats['recent_times'] else 0.0
+            }
+        return summary
+    
+    def print_summary(self):
+        """Print statistics summary"""
+        print("\n" + "="*80)
+        print("PROCESS_DATA TIMING SUMMARY")
+        print("="*80)
+        for role_name, stats in self.stats.items():
+            recent_avg = sum(stats['recent_times']) / len(stats['recent_times']) if stats['recent_times'] else 0.0
+            print(f"{role_name:15} | "
+                  f"Count: {stats['count']:4d} | "
+                  f"Avg: {stats['avg_time']:6.3f}s | "
+                  f"Min: {stats['min_time']:6.3f}s | "
+                  f"Max: {stats['max_time']:6.3f}s | "
+                  f"Recent: {recent_avg:6.3f}s")
+        print("="*80)
+
+
+def timing_decorator(role_name: str):
+    """Decorator: add timing statistics to process_data method"""
+    def decorator(func):
+        async def wrapper(self, data: Any) -> Any:
+            start_time = time.time()
+            try:
+                result = await func(self, data)  
+                return result
+            finally:
+                end_time = time.time()
+                duration = end_time - start_time
+                
+                # Extract step information for logging
+                step_info = "N/A"
+                if hasattr(self, 'trainer'):
+                    if role_name == "dataloader":
+                        step_info = self.trainer.dataloader_global_step
+                    elif role_name == "generate":
+                        step_info = self.trainer.generate_global_step
+                    else:
+                        # global step has been updated in the trainer
+                        step_info = self.trainer.global_steps - 1
+                elif isinstance(data, tuple) and len(data) > 0:
+                    step_info = data[0]
+                elif isinstance(data, dict) and 'step' in data:
+                    step_info = data['step']
+                
+                # Use global timing collector
+                if hasattr(self, 'timing_collector'):
+                    self.timing_collector.record_timing(role_name, step_info, duration)
+                else:
+                    # If no timing_collector, use global instance
+                    global_timing_collector.record_timing(role_name, step_info, duration)
+        
+        return wrapper
+    return decorator
+
+
+# Global instance
+resource_lock = ResourceLock()
+global_timing_collector = TimingStatsCollector() 
\ No newline at end of file
diff --git a/verl/trainer/ppo/ray_async_pipeline_trainer.py b/verl/trainer/ppo/ray_async_pipeline_trainer.py
new file mode 100644
index 00000000000..9df6407ec42
--- /dev/null
+++ b/verl/trainer/ppo/ray_async_pipeline_trainer.py
@@ -0,0 +1,859 @@
+import json
+import os
+import uuid
+import asyncio
+import atexit
+import concurrent.futures
+from collections import OrderedDict
+import sys
+import torch
+import time
+import ray
+from ray.util.queue import Queue
+import numpy as np
+from copy import deepcopy
+from pprint import pprint
+from tqdm import tqdm
+
+from verl.utils.metric import (
+    reduce_metrics,
+)
+from verl.utils.seqlen_balancing import get_seqlen_balanced_partitions, log_seqlen_unbalance
+from verl.trainer.ppo.reward import compute_reward, compute_reward_async
+from verl.trainer.ppo.ray_trainer import (
+    RayPPOTrainer,
+    RayClassWithInitArgs,
+    Role,
+    OmegaConf,
+    create_colocated_worker_cls,
+    compute_data_metrics,
+    compute_throughout_metrics,
+    compute_timing_metrics,
+    process_validation_metrics,
+    marked_timer,
+    compute_advantage,
+    compute_response_mask,
+    agg_loss,
+    AdvantageEstimator,
+    DataProto,
+    apply_kl_penalty,
+)
+from verl.utils.debug import GPUMemoryLogger, log_gpu_memory_usage
+
+from verl.trainer.ppo.pipeline import (
+    AsyncPipeline,
+    enhanced_print,
+    PIPELINE_END_SIGNAL,
+    PIPELINE_START_SINGLE,
+    ROLE_COLORS,
+)
+
+
+class RayPPOAsyncPipelineTrainer(RayPPOTrainer):
+    
+    def __init__(self, *args, **kwargs):
+        """
+        Initialize the RayPPOAsyncPipelineTrainer.
+
+        Args:
+            config: Configuration for the trainer.
+            device_name: Name of the device to use.
+            use_critic: Whether to use a critic in training.
+            use_reference_policy: Whether to use a reference policy.
+            ref_in_actor: Whether the reference policy is integrated into the actor.
+            use_rm: Whether to use a reward model.
+            hybrid_engine: Whether to use a hybrid engine for actor and rollout.
+            ray_worker_group_cls: Custom Ray worker group class.
+        """
+        super().__init__(*args, **kwargs)
+        
+        self._overlap_param_update = self.config.actor_rollout_ref.get("overlap_param_update", True)
+        self._async_logp_ref_logp = self.config.actor_rollout_ref.get("async_logp_ref_logp", True)
+        self._async_pipeline = AsyncPipeline(max_queue_size=self.config.actor_rollout_ref.rollout.get("max_queue_size", 2))        
+        if not self._async_logp_ref_logp:
+            print(f"roles in async pipeline: {self._async_pipeline.role}", flush=True)
+            self._async_pipeline.role.remove("logp")
+            self._async_pipeline.role.remove("ref_logp")
+        
+        self.global_steps = 0
+        self.dataloader_global_step = 0
+        self.generate_global_step = 0
+
+
+    def init_workers(self):
+        """Initialize distributed training workers using Ray backend.
+
+        Creates:
+        1. Ray resource pools from configuration
+        2. Worker groups for each role (actor, critic, etc.)
+        """
+        t1 = time.time()
+        self.resource_pool_manager.create_resource_pool()
+
+        self.resource_pool_to_cls = {pool: {} for pool in self.resource_pool_manager.resource_pool_dict.values()}
+
+        # create actor and rollout
+        if self.hybrid_engine:
+            resource_pool = self.resource_pool_manager.get_resource_pool(Role.ActorRollout)
+            actor_rollout_cls = RayClassWithInitArgs(
+                cls=self.role_worker_mapping[Role.ActorRollout],
+                config=self.config.actor_rollout_ref,
+                role="actor_rollout",
+            )
+            self.resource_pool_to_cls[resource_pool]["actor_rollout"] = actor_rollout_cls
+        else:
+            # raise NotImplementedError
+            for role, role_name in [(Role.Actor, "actor"), (Role.Rollout, "rollout")]:
+                resource_pool = self.resource_pool_manager.get_resource_pool(role)
+                worker_cls = RayClassWithInitArgs(cls=self.role_worker_mapping[role], config=self.config.actor_rollout_ref, role=role_name)
+                self.resource_pool_to_cls[resource_pool][role_name] = worker_cls
+
+        # create critic
+        if self.use_critic:
+            resource_pool = self.resource_pool_manager.get_resource_pool(Role.Critic)
+            critic_cls = RayClassWithInitArgs(cls=self.role_worker_mapping[Role.Critic], config=self.config.critic)
+            self.resource_pool_to_cls[resource_pool]["critic"] = critic_cls
+
+        # create reference policy if needed
+        if self.use_reference_policy:
+            resource_pool = self.resource_pool_manager.get_resource_pool(Role.RefPolicy)
+            # breakpoint()
+            ref_policy_cls = RayClassWithInitArgs(self.role_worker_mapping[Role.RefPolicy], config=self.config.actor_rollout_ref, role="ref")
+            self.resource_pool_to_cls[resource_pool]["ref"] = ref_policy_cls
+
+        t2 = time.time()
+        print(f"===== finished creating resource pools and worker classes in {t2 - t1:.2f} seconds =====", flush=True)
+
+        # create a reward model if reward_fn is None
+        if self.use_rm:
+            # we create a RM here
+            resource_pool = self.resource_pool_manager.get_resource_pool(Role.RewardModel)
+            rm_cls = RayClassWithInitArgs(self.role_worker_mapping[Role.RewardModel], config=self.config.reward_model)
+            self.resource_pool_to_cls[resource_pool]["rm"] = rm_cls
+
+        # initialize WorkerGroup
+        # NOTE: if you want to use a different resource pool for each role, which can support different parallel size,
+        # you should not use `create_colocated_worker_cls`.
+        # Instead, directly pass different resource pool to different worker groups.
+        # See https://github.com/volcengine/verl/blob/master/examples/ray/tutorial.ipynb for more information.
+        all_wg = {}
+        wg_kwargs = {}  # Setting up kwargs for RayWorkerGroup
+        if OmegaConf.select(self.config.trainer, "ray_wait_register_center_timeout") is not None:
+            wg_kwargs["ray_wait_register_center_timeout"] = self.config.trainer.ray_wait_register_center_timeout
+
+        # breakpoint()
+        
+        async_pipline_init = self.config.trainer.get("async_pipeline", False)
+        
+        _executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
+        atexit.register(_executor.shutdown, wait=True)
+        _async_tasks = []
+
+        for resource_pool, class_dict in self.resource_pool_to_cls.items():
+            t1 = time.time()
+            worker_dict_cls = create_colocated_worker_cls(class_dict=class_dict)
+            wg_dict = self.ray_worker_group_cls(resource_pool=resource_pool, ray_cls_with_init=worker_dict_cls, device_name=self.device_name, **wg_kwargs)
+            spawn_wg = wg_dict.spawn(prefix_set=class_dict.keys())
+            all_wg.update(spawn_wg)
+            t2 = time.time()
+            print(f"using resource pool {resource_pool} cost time:{t2 - t1:.2}s", flush=True)
+        
+        t1 = time.time()
+        
+        if self.use_critic:
+            self.critic_wg = all_wg["critic"]
+            self.critic_wg.init_model()
+
+        if self.use_reference_policy and not self.ref_in_actor:
+            self.ref_policy_wg = all_wg["ref"]
+            if async_pipline_init:
+                _async_tasks.append(_executor.submit(self.ref_policy_wg.init_model))
+            else:
+                self.ref_policy_wg.init_model()
+
+        if self.use_rm:
+            self.rm_wg = all_wg["rm"]
+            self.rm_wg.init_model()
+
+        t2 = time.time()
+
+        if "actor_rollout" in all_wg:
+            # we should create rollout at the end so that vllm can have a better estimation of kv cache memory
+            self.actor_rollout_wg = all_wg["actor_rollout"]
+            self.actor_rollout_wg.init_model()
+        else:
+            # if we are not using hybrid engine, we should create actor and rollout separately
+            self.actor_wg = all_wg["actor"]
+            self.rollout_wg = all_wg["rollout"]
+            
+            if async_pipline_init:
+                print(f"===== initializing actor worker asynchronously =====", flush=True)
+                # Use the executor to run the actor initialization in a separate thread
+                _async_tasks.append(_executor.submit(self.actor_wg.init_model))
+                # Use the executor to run the rollout initialization in a separate thread
+                _async_tasks.append(_executor.submit(self.rollout_wg.init_model))
+            else:
+                self.actor_wg.init_model()
+                self.rollout_wg.init_model()
+
+        # wait for all async tasks to finish
+        if len(_async_tasks) > 0:
+            print(f"===== waiting for async tasks to finish =====", flush=True)
+            for task in _async_tasks:
+                task.result()
+
+        if async_pipline_init:
+            members = self.actor_wg.workers + self.rollout_wg.workers
+            col_size = len(members)
+            col_name = "actor_rollout_sync"
+            col_ranks = list(range(col_size))
+            
+            comm_type = getattr(self.config.actor_rollout_ref, "comm_type", "ray")  # "ray" or "pytorch"
+            backend = getattr(self.config.actor_rollout_ref, "comm_backend", "nccl")    # "nccl" or "gloo"
+            
+            if comm_type == "ray":
+                # use Ray Collective (support NCCL)
+                import ray.util.collective as col
+                col.create_collective_group(
+                    actors=members,
+                    world_size=col_size,
+                    ranks=col_ranks,
+                    backend=backend,
+                    group_name=col_name
+                )
+                print(f"Created Ray Collective {backend.upper()} group: {col_name}")
+            elif comm_type == "pytorch":
+                # use PyTorch distributed, no need to initialize Ray Collective
+                print(f"Using PyTorch {backend} backend, skipping Ray Collective initialization")
+            else:
+                print(f"Unsupported comm_type: {comm_type}")
+            
+            # setup worker communication config
+            actor_len = len(self.actor_wg.workers)
+            self.actor_wg.setup_for_ray_col(0, col_size, col_name, backend)
+            if actor_len != col_size:
+                self.rollout_wg.setup_for_ray_col(actor_len, col_size, col_name, backend)
+            
+            self.actor_wg.check_for_ray_col(col_name)
+            if actor_len != col_size:
+                self.rollout_wg.check_for_ray_col(col_name)
+            
+            # share global queue
+            from ray.util.queue import Queue
+            engine_tp_size = self.config.actor_rollout_ref.rollout.tensor_model_parallel_size
+            # TODO: support PP/EP
+            engine_nums = (col_size - actor_len) // engine_tp_size
+            self._global_queue = [Queue() for _ in range(engine_nums)]
+            self.actor_wg.setup_for_queue(self._global_queue)
+            self.rollout_wg.setup_for_queue(self._global_queue)
+
+            # sync param_meta
+            param_meta = self.actor_wg.get_params_meta()
+            self.rollout_wg.set_params_meta(param_meta)
+        
+        t3 = time.time()
+        print(f"===== finished async_pipline:{async_pipline_init} initializing workers in {t3 - t2:.2f},{t2-t1:.2f} seconds =====", flush=True)
+
+        # create async rollout manager and request scheduler
+        self.async_rollout_mode = False
+        if self.config.actor_rollout_ref.rollout.mode == "async":
+            self.async_rollout_mode = True
+            from verl.workers.rollout.async_server import AsyncLLMServerManager
+            self.async_rollout_manager = AsyncLLMServerManager(
+                config=self.config.actor_rollout_ref,
+                worker_group=self.actor_rollout_wg,
+            )
+
+
+    def _balance_batch(self, batch: DataProto, metrics, logging_prefix="global_seqlen"):
+        """Reorder the data on single controller such that each dp rank gets similar total tokens"""
+        attention_mask = batch.batch["attention_mask"]
+        batch_size = attention_mask.shape[0]
+        global_seqlen_lst = batch.batch["attention_mask"].view(batch_size, -1).sum(-1).tolist()  # (train_batch_size,)
+        world_size = self.actor_wg.world_size
+        global_partition_lst = get_seqlen_balanced_partitions(global_seqlen_lst, k_partitions=world_size, equal_size=True)
+        # reorder based on index. The data will be automatically equally partitioned by dispatch function
+        global_idx = torch.tensor([j for partition in global_partition_lst for j in partition])
+        batch.reorder(global_idx)
+        global_balance_stats = log_seqlen_unbalance(seqlen_list=global_seqlen_lst, partitions=global_partition_lst, prefix=logging_prefix)
+        metrics.update(global_balance_stats)
+
+    async def sync_weight(self, sync_thread=False):
+        """sync weights to all workers"""
+        if sync_thread:
+            self.actor_wg.sync_per_tensor_generator()
+            ray.get(self.rollout_wg.sync_per_tensor_generator())
+        else:
+            self.actor_wg.sync_per_tensor_generator()
+            self.rollout_wg.sync_per_tensor_generator()
+
+
+    async def rollout(self):
+        """rollout"""
+        while True:
+            print("Performing rollout...")
+            await asyncio.sleep(1)
+
+    def _pre_process_batch(self, batch: DataProto):
+        # pop those keys for generation
+        batch_keys_to_pop = ["input_ids", "attention_mask", "position_ids"]
+        non_tensor_batch_keys_to_pop = ["raw_prompt_ids"]
+        if "multi_modal_data" in batch.non_tensor_batch:
+            non_tensor_batch_keys_to_pop.append("multi_modal_data")
+        if "raw_prompt" in batch.non_tensor_batch:
+            non_tensor_batch_keys_to_pop.append("raw_prompt")
+        if "tools_kwargs" in batch.non_tensor_batch:
+            non_tensor_batch_keys_to_pop.append("tools_kwargs")
+        _gen_batch = batch.pop(
+            batch_keys=batch_keys_to_pop,
+            non_tensor_batch_keys=non_tensor_batch_keys_to_pop,
+        )
+        
+        # repeat in trainer
+        if self.config.actor_rollout_ref.rollout.n > 1:
+            _gen_batch = _gen_batch.repeat(repeat_times=self.config.actor_rollout_ref.rollout.n, interleave=True)
+        return _gen_batch
+
+    def get_next_batch(self):
+        for epoch in range(self.config.trainer.total_epochs):
+            self.epoch = epoch
+            for batch_dict in self.train_dataloader:
+                metrics = {}
+                timing_raw = {}
+                batch: DataProto = DataProto.from_single_dict(batch_dict)
+
+                # pop those keys for generation
+                gen_batch = self._pre_process_batch(batch)
+                
+                # self.global_steps += 1
+                self.dataloader_global_step += 1
+                yield self.dataloader_global_step, gen_batch, batch_dict
+        yield -1, PIPELINE_END_SIGNAL, PIPELINE_END_SIGNAL
+            
+    async def dataloader_loop(self):
+        
+        dataloader_batch_iter = self.get_next_batch()
+        pipeline_start = await self._async_pipeline.pull(src_role="train", dst_role="dataloader")
+        print(f"[dataloader] loop started with pipeline_start: {pipeline_start}, {pipeline_start == PIPELINE_START_SINGLE}")
+        max_pending_size = 2
+        while True:
+            if pipeline_start == PIPELINE_START_SINGLE:
+                
+                cur_queue = self._async_pipeline.get_cur_queue(src_role="train", dst_role="rollout")
+                
+                if cur_queue.qsize() >= max_pending_size:
+                    await asyncio.sleep(1)
+                    continue
+
+                cur_global_steps, gen_batch, batch_dict = next(dataloader_batch_iter)
+                if gen_batch == PIPELINE_END_SIGNAL:
+                    print("dataloader loop finished.")
+                    await self._async_pipeline.push(src_role="dataloader", dst_role="train", data=PIPELINE_END_SIGNAL)
+                    break
+                
+                await self._async_pipeline.push(src_role="dataloader", dst_role="train", data=(cur_global_steps, batch_dict))
+                
+                await self._async_pipeline.push(src_role="train", dst_role="rollout", data=(cur_global_steps, gen_batch))
+
+                next_queue = self._async_pipeline.get_cur_queue(src_role="train", dst_role="rollout")
+                print(f"[dataloader] Pushed step:{cur_global_steps}, gs:{self.global_steps} batch to train queue. Next queue size: {next_queue.qsize()}")
+
+
+    async def rollout_generate(self):
+        
+        while True:
+            _is_complete = self._async_pipeline.is_complete(src_role="rollout", dst_role="train")
+            if _is_complete:
+                print(f"[rollout] Pipeline is complete, exiting rollout_generate.")
+                break
+            
+            # check update-param before generate
+            cur_model_queue = self._async_pipeline.get_cur_queue(src_role="param_update", dst_role="rollout")
+            if cur_model_queue.qsize() > 0:
+                cur_model_step = await self._async_pipeline.pull(src_role="param_update", dst_role="rollout")
+                print(f"[rollout] Current model step: {cur_model_step}, global steps: {self.global_steps}, generate_global_step:{self.generate_global_step}")
+            
+            cur_queue = self._async_pipeline.get_cur_queue(src_role="train", dst_role="rollout")
+            
+            print(f"[rollout] Waiting for training data in the queue... Current queue size: {cur_queue.qsize()}")
+            step, gen_batch = await self._async_pipeline.pull(src_role="train", dst_role="rollout")
+
+            next_queue = self._async_pipeline.get_cur_queue(src_role="rollout", dst_role="train")
+
+            gen_batch_output = await asyncio.to_thread(self.rollout_wg.generate_sequences, gen_batch)
+
+            print(f"[rollout] Sending rollout data to train queue, Next queue size: {next_queue.qsize()}")
+            await self._async_pipeline.push(src_role="rollout", dst_role="train", data=gen_batch_output)
+            self.generate_global_step += 1
+            await self._async_pipeline.push(src_role="rollout", dst_role="param_update", data=self.generate_global_step)
+            
+            print(f"[rollout] Step {step}: Sent rollout data to train queue size: {next_queue.qsize()}")
+
+
+    async def rollout_mock(self, mock_data=True):
+        for step in range(5):
+            print(f"Waiting for training data in the queue...")
+            train_data = await self._async_pipeline.pull(src_role="train", dst_role="rollout")
+            print(f"[Rollout] Step {step + 1}: Received rollout data from train queue")
+            
+            # rollout
+            if mock_data:
+                # # rollout_data = self.rollout_wg.generate_sequences(train_data)
+                rollout_data = {
+                    "responses": torch.randint(0, 2, (8, 10)),  # mock generated responses
+                    "prompts": train_data["input_ids"],  # use training data input as prompts
+                    "scores": torch.rand(8),  # mock scores
+                }
+                
+            else:
+                # with concurrent.futures.ThreadPoolExecutor() as executor:
+                #     # Use the executor to run the rollout generation in a separate thread
+                #     # future = executor.submit(self.rollout_wg.generate_sequences, gen_batch)
+                #     # gen_batch_output = future.result()
+                #     coro = self.rollout_wg.async_generate_sequences(self.train_to_rollout_queue, self.rollout_to_train_queue)
+                #     future = executor.submit(asyncio.run, coro)
+                #     # No need to Wait for the future to complete and get the result
+                #     # result = future.result()
+                
+                rollout_data = self.rollout_wg.generate_sequences(train_data)
+            
+            print(f"[Rollout] Step {step + 1}: Sending rollout data to train queue")
+            await self._async_pipeline.push(src_role="rollout", dst_role="train", data=rollout_data)
+
+    async def rollout_logp(self):
+        """rollout logp"""
+        
+        while True:
+            _is_complete = self._async_pipeline.is_complete(src_role="logp", dst_role="train")
+            if _is_complete:
+                print(f"[Logp] Pipeline is complete, exiting rollout_logp.")
+                break
+        
+            cur_queue = self._async_pipeline.get_cur_queue(src_role="train", dst_role="logp")
+            print(f"[Logp] Waiting for training data in the queue... Current queue size: {cur_queue.qsize()}")
+            batch = await self._async_pipeline.pull(src_role="train", dst_role="logp")
+            
+            old_log_prob = self.actor_wg.compute_log_prob(batch)
+            
+            await self._async_pipeline.push(src_role="logp", dst_role="train", data=old_log_prob)
+
+
+    async def rollout_ref_logp(self):
+        """rollout ref logp"""
+        
+        if not self.use_reference_policy:
+            return
+        
+        while True:
+            _is_complete = self._async_pipeline.is_complete(src_role="ref_logp", dst_role="train")
+            if _is_complete:
+                print(f"[Ref Logp] Pipeline is complete, exiting rollout_ref_logp.")
+                break
+            
+            cur_queue = self._async_pipeline.get_cur_queue(src_role="train", dst_role="ref_logp")
+            print(f"[Ref Logp] Waiting for training data in the queue... Current queue size: {cur_queue.qsize()}")
+            batch = await self._async_pipeline.pull(src_role="train", dst_role="ref_logp")
+            
+            if not self.ref_in_actor:
+                ref_log_prob = self.ref_policy_wg.compute_ref_log_prob(batch)
+            else:
+                ref_log_prob = self.actor_rollout_wg.compute_ref_log_prob(batch)
+                
+            await self._async_pipeline.push(src_role="ref_logp", dst_role="train", data=ref_log_prob)
+            
+            # if self.use_reference_policy:
+            #     # compute reference log_prob
+            #     with marked_timer("ref", timing_raw):
+            #         if not self.ref_in_actor:
+            #             ref_log_prob = self.ref_policy_wg.compute_ref_log_prob(batch)
+            #         else:
+            #             ref_log_prob = self.actor_rollout_wg.compute_ref_log_prob(batch)
+            #         batch = batch.union(ref_log_prob)
+
+    async def rollout_reward_fn(self):
+        """rollout-reward-fn"""
+        while True:
+            _is_complete = self._async_pipeline.is_complete(src_role="reward", dst_role="train")
+            if _is_complete:
+                print(f"[Reward] Pipeline is complete, exiting rollout_reward_fn.")
+                break
+
+            batch = await self._async_pipeline.pull(src_role="train", dst_role="reward")
+
+            # with marked_timer("reward", timing_raw):
+            # compute reward model score
+            if self.use_rm:
+                reward_tensor = self.rm_wg.compute_rm_score(batch)
+                batch = batch.union(reward_tensor)
+
+            if self.config.reward_model.launch_reward_fn_async:
+                future_reward = compute_reward_async.remote(batch, self.config, self.tokenizer)
+                # sync wait for the future
+                reward_tensor, reward_extra_infos_dict = ray.get(future_reward)
+            else:
+                reward_tensor, reward_extra_infos_dict = compute_reward(batch, self.reward_fn)
+            
+            # push to the train queue
+            await self._async_pipeline.push(src_role="reward", dst_role="train", data=(reward_tensor, reward_extra_infos_dict))
+            
+            print(f"[Reward] Step {self.global_steps}: Sent reward to train queue.")
+
+    async def param_update_loop(self):
+        """param update loop"""
+        while True:
+            _is_complete = self._async_pipeline.is_complete(src_role="param_update", dst_role="train")
+            if _is_complete:
+                print(f"[Param Update] Pipeline is complete, exiting param_update_loop.")
+                break
+            
+            # wait for param update: train-step-done -> syncing -> done
+            model_step = await self._async_pipeline.pull(src_role="train", dst_role="param_update")
+            rollout_step = await self._async_pipeline.pull(src_role="rollout", dst_role="param_update")
+            
+            print(f"[Param Update] Received model step: {model_step}, rollout step: {rollout_step}")
+            if model_step <= rollout_step:
+                # sync weights to all workers
+                await self.sync_weight()
+            
+            # await self._async_pipeline.push(src_role="param_update", dst_role="train", data=model_step)
+            await self._async_pipeline.push(src_role="param_update", dst_role="rollout", data=model_step)
+            print(f"[Param Update] step:{model_step} Parameters updated.")
+            
+    async def train_loop(self):
+        from omegaconf import OmegaConf
+
+        from verl.utils.tracking import Tracking
+
+        logger = Tracking(
+            project_name=self.config.trainer.project_name,
+            experiment_name=self.config.trainer.experiment_name,
+            default_backend=self.config.trainer.logger,
+            config=OmegaConf.to_container(self.config, resolve=True),
+        )
+
+
+        # load checkpoint before doing anything
+        self._load_checkpoint()
+
+        # # perform validation before training
+        # # currently, we only support validation using the reward_function.
+        # if self.val_reward_fn is not None and self.config.trainer.get("val_before_train", True):
+        #     print(f"===== validation before training =====", flush=True)
+        #     val_metrics = self._validate()
+        #     assert val_metrics, f"{val_metrics=}"
+        #     pprint(f"Initial validation metrics: {val_metrics}")
+        #     logger.log(data=val_metrics, step=self.global_steps)
+        #     if self.config.trainer.get("val_only", False):
+        #         return
+
+        # add tqdm
+        progress_bar = tqdm(total=self.total_training_steps, initial=self.global_steps, desc="Training Progress")
+
+        # update first param?
+        await self._async_pipeline.push(src_role="train", dst_role="param_update", data=self.global_steps)
+        
+        # we start from step 1
+        self.global_steps += 1
+        last_val_metrics = None
+
+        await self._async_pipeline.push(src_role="train", dst_role="dataloader", data=PIPELINE_START_SINGLE)
+
+        
+        # for epoch in range(self.config.trainer.total_epochs):
+        #     for batch_dict in self.train_dataloader:
+        if True:  # async-rl is a loop, so we don't need to loop over epochs and batches
+            while True:
+                metrics = {}
+                timing_raw = {}
+                
+                # async get batch from the dataloader
+                cur_global_steps, batch_dict = await self._async_pipeline.pull(src_role="dataloader", dst_role="train")
+                batch: DataProto = DataProto.from_single_dict(batch_dict)
+                # need to repeat pre-process part
+                _gen_batch = self._pre_process_batch(batch)
+                
+                is_last_step = self.global_steps >= self.total_training_steps
+                
+                with marked_timer("step", timing_raw):
+                    # async rollout in the background
+                    with marked_timer("gen", timing_raw):
+                        # get the batch from the queue
+                        gen_batch_output = await self._async_pipeline.pull(src_role="rollout", dst_role="train")
+
+                    batch.non_tensor_batch["uid"] = np.array([str(uuid.uuid4()) for _ in range(len(batch.batch))], dtype=object)
+                    # repeat to align with repeated responses in rollout
+                    batch = batch.repeat(repeat_times=self.config.actor_rollout_ref.rollout.n, interleave=True)
+                    batch = batch.union(gen_batch_output)
+
+                    batch.batch["response_mask"] = compute_response_mask(batch)
+                    # Balance the number of valid tokens across DP ranks.
+                    # NOTE: This usually changes the order of data in the `batch`,
+                    # which won't affect the advantage calculation (since it's based on uid),
+                    # but might affect the loss calculation (due to the change of mini-batching).
+                    # TODO: Decouple the DP balancing and mini-batching.
+                    if self.config.trainer.balance_batch:
+                        self._balance_batch(batch, metrics=metrics)
+
+                    # compute global_valid tokens
+                    batch.meta_info["global_token_num"] = torch.sum(batch.batch["attention_mask"], dim=-1).tolist()
+                    
+                    with marked_timer("reward", timing_raw):
+                        if self._async_pipeline.is_in_pipeline("reward"):
+                            
+                            await self._async_pipeline.push("train", "reward", batch)
+                            
+                            # TODO: lazy load reward_fn
+                            # reward_tensor, reward_extra_infos_dict = await self._async_pipeline.pull(src_role="reward", dst_role="train")
+
+                        else:
+                            # compute reward model score
+                            if self.use_rm:
+                                reward_tensor = self.rm_wg.compute_rm_score(batch)
+                                batch = batch.union(reward_tensor)
+
+                            if self.config.reward_model.launch_reward_fn_async:
+                                future_reward = compute_reward_async.remote(batch, self.config, self.tokenizer)
+                            else:
+                                reward_tensor, reward_extra_infos_dict = compute_reward(batch, self.reward_fn)
+
+                    if self._async_pipeline.is_in_pipeline("logp"):
+                        await self._async_pipeline.push(src_role="train", dst_role="logp", data=batch)
+                    if self._async_pipeline.is_in_pipeline("ref_logp"):
+                        if self.use_reference_policy:
+                            await self._async_pipeline.push(src_role="train", dst_role="ref_logp", data=batch)     
+
+                    # recompute old_log_probs
+                    with marked_timer("old_log_prob", timing_raw):
+                        if not self._async_pipeline.is_in_pipeline("logp"):
+                            # old_log_prob = self.actor_rollout_wg.compute_log_prob(batch)
+                            old_log_prob = self.actor_wg.compute_log_prob(batch)
+                        else:
+                            old_log_prob = await self._async_pipeline.pull(src_role="logp", dst_role="train")
+                        
+                        entropys = old_log_prob.batch["entropys"]
+                        response_masks = batch.batch["response_mask"]
+                        loss_agg_mode = self.config.actor_rollout_ref.actor.loss_agg_mode
+                        entropy_loss = agg_loss(loss_mat=entropys, loss_mask=response_masks, loss_agg_mode=loss_agg_mode)
+                        old_log_prob_metrics = {"actor/entropy_loss": entropy_loss.detach().item()}
+                        metrics.update(old_log_prob_metrics)
+                        old_log_prob.batch.pop("entropys")
+                        batch = batch.union(old_log_prob)
+
+                        if "rollout_log_probs" in batch.batch.keys():
+                            # TODO: we may want to add diff of probs too.
+                            rollout_old_log_probs = batch.batch["rollout_log_probs"]
+                            actor_old_log_probs = batch.batch["old_log_probs"]
+                            attention_mask = batch.batch["attention_mask"]
+                            responses = batch.batch["responses"]
+                            response_length = responses.size(1)
+                            response_mask = attention_mask[:, -response_length:]
+
+                            rollout_probs = torch.exp(rollout_old_log_probs)
+                            actor_probs = torch.exp(actor_old_log_probs)
+                            rollout_probs_diff = torch.abs(rollout_probs - actor_probs)
+                            rollout_probs_diff = torch.masked_select(rollout_probs_diff, response_mask.bool())
+                            rollout_probs_diff_max = torch.max(rollout_probs_diff)
+                            rollout_probs_diff_mean = torch.mean(rollout_probs_diff)
+                            rollout_probs_diff_std = torch.std(rollout_probs_diff)
+                            metrics.update(
+                                {
+                                    "training/rollout_probs_diff_max": rollout_probs_diff_max.detach().item(),
+                                    "training/rollout_probs_diff_mean": rollout_probs_diff_mean.detach().item(),
+                                    "training/rollout_probs_diff_std": rollout_probs_diff_std.detach().item(),
+                                }
+                            )
+                    
+                    if self.use_reference_policy:
+                        # compute reference log_prob
+                        with marked_timer("ref", timing_raw):
+                            if self._async_pipeline.is_in_pipeline("ref_logp"):
+                                ref_log_prob = await self._async_pipeline.pull(src_role="ref_logp", dst_role="train")
+                            else:
+                                if not self.ref_in_actor:
+                                    ref_log_prob = self.ref_policy_wg.compute_ref_log_prob(batch)
+                                else:
+                                    ref_log_prob = self.actor_rollout_wg.compute_ref_log_prob(batch)
+                            batch = batch.union(ref_log_prob)
+
+
+                    # compute values
+                    if self.use_critic:
+                        with marked_timer("values", timing_raw):
+                            values = self.critic_wg.compute_values(batch)
+                            batch = batch.union(values)
+
+                    with marked_timer("adv", timing_raw):
+                        # we combine with rule-based rm
+                        reward_extra_infos_dict: dict[str, list]
+                        if self._async_pipeline.is_in_pipeline("reward"):
+                            reward_tensor, reward_extra_infos_dict = await self._async_pipeline.pull(src_role="reward", dst_role="train")
+                        else:
+                            if self.config.reward_model.launch_reward_fn_async:
+                                reward_tensor, reward_extra_infos_dict = ray.get(future_reward)
+
+                        batch.batch["token_level_scores"] = reward_tensor
+
+                        # print(f"{list(reward_extra_infos_dict.keys())=}")
+                        if reward_extra_infos_dict:
+                            batch.non_tensor_batch.update({k: np.array(v) for k, v in reward_extra_infos_dict.items()})
+
+                        # compute rewards. apply_kl_penalty if available
+                        if self.config.algorithm.use_kl_in_reward:
+                            batch, kl_metrics = apply_kl_penalty(batch, kl_ctrl=self.kl_ctrl_in_reward, kl_penalty=self.config.algorithm.kl_penalty)
+                            metrics.update(kl_metrics)
+                        else:
+                            batch.batch["token_level_rewards"] = batch.batch["token_level_scores"]
+
+                        # compute advantages, executed on the driver process
+
+                        norm_adv_by_std_in_grpo = self.config.algorithm.get("norm_adv_by_std_in_grpo", True)  # GRPO adv normalization factor
+
+                        batch = compute_advantage(
+                            batch,
+                            adv_estimator=self.config.algorithm.adv_estimator,
+                            gamma=self.config.algorithm.gamma,
+                            lam=self.config.algorithm.lam,
+                            num_repeat=self.config.actor_rollout_ref.rollout.n,
+                            norm_adv_by_std_in_grpo=norm_adv_by_std_in_grpo,
+                            multi_turn=self.config.actor_rollout_ref.rollout.multi_turn.enable,
+                            use_pf_ppo=self.config.algorithm.use_pf_ppo,
+                            pf_ppo_reweight_method=self.config.algorithm.pf_ppo.reweight_method,
+                            pf_ppo_weight_pow=self.config.algorithm.pf_ppo.weight_pow,
+                        )
+
+                    # update critic
+                    if self.use_critic:
+                        with marked_timer("update_critic", timing_raw):
+                            critic_output = self.critic_wg.update_critic(batch)
+                        critic_output_metrics = reduce_metrics(critic_output.meta_info["metrics"])
+                        metrics.update(critic_output_metrics)
+
+                    # implement critic warmup
+                    if self.config.trainer.critic_warmup <= self.global_steps:
+                        # update actor
+                        with marked_timer("update_actor", timing_raw):
+                            batch.meta_info["multi_turn"] = self.config.actor_rollout_ref.rollout.multi_turn.enable
+                            # print(f"batch_size:{batch.batch.batch_size[0]}")
+                            
+                            actor_output = self.actor_wg.update_actor(batch)
+                            
+                            await self._async_pipeline.push(src_role="train", dst_role="param_update", data=self.global_steps)
+
+                        actor_output_metrics = reduce_metrics(actor_output.meta_info["metrics"])
+                        metrics.update(actor_output_metrics)
+
+                    t_post1 = time.time()
+                    # Log rollout generations if enabled
+                    rollout_data_dir = self.config.trainer.get("rollout_data_dir", None)
+                    if rollout_data_dir:
+                        with marked_timer("dump_rollout_generations", timing_raw):
+                            print(batch.batch.keys())
+                            inputs = self.tokenizer.batch_decode(batch.batch["prompts"], skip_special_tokens=True)
+                            outputs = self.tokenizer.batch_decode(batch.batch["responses"], skip_special_tokens=True)
+                            scores = batch.batch["token_level_scores"].sum(-1).cpu().tolist()
+                            self._dump_generations(
+                                inputs=inputs,
+                                outputs=outputs,
+                                scores=scores,
+                                reward_extra_infos_dict=reward_extra_infos_dict,
+                                dump_path=rollout_data_dir,
+                            )
+
+                    # # validate
+                    # if self.val_reward_fn is not None and self.config.trainer.test_freq > 0 and (is_last_step or self.global_steps % self.config.trainer.test_freq == 0):
+                    #     with marked_timer("testing", timing_raw):
+                    #         val_metrics: dict = self._validate()
+                    #         if is_last_step:
+                    #             last_val_metrics = val_metrics
+                    #     metrics.update(val_metrics)
+
+                    if self.config.trainer.save_freq > 0 and (is_last_step or self.global_steps % self.config.trainer.save_freq == 0):
+                        with marked_timer("save_checkpoint", timing_raw):
+                            worker = self.actor_rollout_wg if "actor_rollout" in self.resource_pool_to_cls else self.actor_wg
+                            self._save_checkpoint(worker)
+
+                    t_post2 = time.time()
+                    print(f"[Train] Step {self.global_steps}: Post-processing took {t_post2 - t_post1:.2f}s")
+
+                # training metrics
+                metrics.update(
+                    {
+                        "training/global_step": self.global_steps,
+                        "training/epoch": self.epoch,
+                    }
+                )
+                # collect metrics
+                metrics.update(compute_data_metrics(batch=batch, use_critic=self.use_critic))
+                metrics.update(compute_timing_metrics(batch=batch, timing_raw=timing_raw))
+                # TODO: implement actual tflpo and theoretical tflpo
+                n_gpus = self.resource_pool_manager.get_n_gpus()
+                metrics.update(compute_throughout_metrics(batch=batch, timing_raw=timing_raw, n_gpus=n_gpus))
+
+                # TODO: make a canonical logger that supports various backend
+                logger.log(data=metrics, step=self.global_steps)
+
+                progress_bar.update(1)
+                self.global_steps += 1
+                if is_last_step:
+                    pprint(f"Final validation metrics: {last_val_metrics}")
+                    progress_bar.close()
+                    return
+
+    async def train_mock(self):
+        for step in range(5):
+            # mock training logic
+            train_batch = {
+                "input_ids": torch.randint(0, 2, (8, 10)),  # mock input
+                "labels": torch.randint(0, 2, (8, 10))  # mock label
+            }
+            print(f"[Training] Step {step + 1}: Sending batch to rollout queue")
+
+            await self._async_pipeline.push('train', 'rollout', train_batch)
+            
+            # mock wait for a while
+            await asyncio.sleep(1)
+            
+            # mock get result from rollout queue
+            result = await self._async_pipeline.pull('rollout', 'train')
+            print(f"[Training] Step {step + 1}: Received result from rollout queue: {result}")
+            
+    async def fit_async(self):
+        """
+        async execute rollout and train, simple but easy to confuse with overlap logic, deprecated (switch to state machine task-loop)
+        """
+        await asyncio.gather(
+            # 1. dataloader_loop
+            # 2. train_loop get batch from dataloader_loop
+            # 3. rollout_generate get batch from train_loop
+            # 4. rollout_reward_fn get batch from train_loop
+            # 5. rollout_logp get batch from train_loop
+            # 6. rollout_ref_logp get batch from train_loop
+            # 7. param_update_loop triggered by train_loop/rollout_generate;
+            self.dataloader_loop(),
+            self.train_loop(),
+            self.rollout_generate(),
+            self.rollout_reward_fn(),
+            self.rollout_logp(),
+            self.rollout_ref_logp(),
+            self.param_update_loop(),
+        )
+
+    def fit(self, use_blocking_mode=False):
+        """
+        sync entry, start async tasks
+        
+        Args:
+            use_blocking_mode: whether to use blocking mode (default False)
+                              True: use sync mode, use nccl for sync (because of thread safety, cannot do async parameter sync, so deprecated)
+                              False: use pure async mode, use cpu for async parameter sync
+        """
+        mode_name = "blocking" if use_blocking_mode else "async"
+        print(f"Starting async fit with {mode_name} mode...")
+        
+        from verl.trainer.ppo.pipeline import AsyncTrainingFlow
+
+        enhanced_trainer = AsyncTrainingFlow(
+            self, 
+            use_async_rl=not use_blocking_mode,
+        )
+        asyncio.run(enhanced_trainer.run_state_machine_pipeline())
diff --git a/verl/trainer/ppo/ray_trainer.py b/verl/trainer/ppo/ray_trainer.py
index 6f85a289731..08f289742e4 100644
--- a/verl/trainer/ppo/ray_trainer.py
+++ b/verl/trainer/ppo/ray_trainer.py
@@ -345,7 +345,6 @@ def __init__(
         self.val_reward_fn = val_reward_fn
 
         self.hybrid_engine = config.actor_rollout_ref.hybrid_engine
-        assert self.hybrid_engine, "Currently, only support hybrid engine"
 
         if self.hybrid_engine:
             assert Role.ActorRollout in role_worker_mapping, f"{role_worker_mapping.keys()=}"
@@ -369,7 +368,7 @@ def __init__(
         if self.config.algorithm.use_kl_in_reward:
             self.kl_ctrl_in_reward = core_algos.get_kl_controller(self.config.algorithm.kl_ctrl)
 
-        if config.critic.enable is not None:
+        if hasattr(config.critic, 'enable') and config.critic.enable is not None:
             self.use_critic = bool(config.critic.enable)
         elif self.config.algorithm.adv_estimator == AdvantageEstimator.GAE:
             self.use_critic = True
@@ -523,7 +522,7 @@ def _create_dataloader(self, train_dataset, val_dataset, collate_fn, train_sampl
 
             collate_fn = default_collate_fn
 
-        num_workers = self.config.data["dataloader_num_workers"]
+        num_workers = self.config.data.get("dataloader_num_workers", 8)
 
         self.train_dataloader = StatefulDataLoader(
             dataset=self.train_dataset,
@@ -833,7 +832,7 @@ def init_workers(self):
         wg_kwargs = {}  # Setting up kwargs for RayWorkerGroup
         if OmegaConf.select(self.config.trainer, "ray_wait_register_center_timeout") is not None:
             wg_kwargs["ray_wait_register_center_timeout"] = self.config.trainer.ray_wait_register_center_timeout
-        if OmegaConf.select(self.config.global_profiler, "steps") is not None:
+        if hasattr(self.config, 'global_profiler') and OmegaConf.select(self.config.global_profiler, "steps") is not None:
             wg_kwargs["profile_steps"] = OmegaConf.select(self.config.global_profiler, "steps")
             assert (
                 OmegaConf.select(self.config.global_profiler.global_tool_config.nsys, "worker_nsight_options")
@@ -870,6 +869,8 @@ def init_workers(self):
         self.actor_rollout_wg = all_wg["actor_rollout"]
         self.actor_rollout_wg.init_model()
 
+        print(f"===== finished init workers =====", flush=True)
+
         # create async rollout manager and request scheduler
         self.async_rollout_mode = False
         if self.config.actor_rollout_ref.rollout.mode == "async":
@@ -881,7 +882,7 @@ def init_workers(self):
                 worker_group=self.actor_rollout_wg,
             )
 
-    def _save_checkpoint(self):
+    def _save_checkpoint(self, worker):
         from verl.utils.fs import local_mkdir_safe
 
         # path: given_path + `/global_step_{global_steps}` + `/actor`
@@ -911,7 +912,7 @@ def _save_checkpoint(self):
             self.config.trainer.get("max_critic_ckpt_to_keep", None) if not remove_previous_ckpt_in_save else 1
         )
 
-        self.actor_rollout_wg.save_checkpoint(
+        worker.save_checkpoint(
             actor_local_path, actor_remote_path, self.global_steps, max_ckpt_to_keep=max_actor_ckpt_to_keep
         )
 
@@ -1061,6 +1062,7 @@ def fit(self):
         # perform validation before training
         # currently, we only support validation using the reward_function.
         if self.val_reward_fn is not None and self.config.trainer.get("val_before_train", True):
+            print(f"===== validation before training =====", flush=True)
             val_metrics = self._validate()
             assert val_metrics, f"{val_metrics=}"
             pprint(f"Initial validation metrics: {val_metrics}")
@@ -1336,7 +1338,8 @@ def fit(self):
                         if esi_close_to_expiration:
                             print("Force saving checkpoint: ESI instance expiration approaching.")
                         with marked_timer("save_checkpoint", timing_raw, color="green"):
-                            self._save_checkpoint()
+                            worker = self.actor_rollout_wg
+                            self._save_checkpoint(worker)
 
                 with marked_timer("stop_profile", timing_raw):
                     next_step_profile = (
diff --git a/verl/utils/checkpoint/megatron_checkpoint_manager.py b/verl/utils/checkpoint/megatron_checkpoint_manager.py
index f0071b8ca3c..3a3249176b7 100644
--- a/verl/utils/checkpoint/megatron_checkpoint_manager.py
+++ b/verl/utils/checkpoint/megatron_checkpoint_manager.py
@@ -18,13 +18,14 @@
 import random
 from collections.abc import Callable
 from dataclasses import asdict
+from dataclasses import is_dataclass
+from argparse import Namespace
 
 import numpy as np
 import torch
 import torch.distributed
 from megatron.core import mpu, tensor_parallel
 from megatron.core.dist_checkpointing.mapping import ShardedObject
-from megatron.core.transformer.enums import AttnBackend
 from transformers import GenerationConfig
 
 from verl.models.weight_loader_registry import get_weight_saver
@@ -101,6 +102,7 @@ class MegatronCheckpointManager(BaseCheckpointManager):
 
     def __init__(
         self,
+        tf_config,
         config,
         checkpoint_config,
         model_config,
@@ -128,6 +130,7 @@ def __init__(
             checkpoint_config=checkpoint_config,
         )
         self.arch = arch
+        self.tf_config = tf_config
         self.config = config
         self.transformer_config = transformer_config
         self.role = role
@@ -436,6 +439,7 @@ def save_checkpoint(self, local_path: str, hdfs_path: str = None, global_step: i
                 # Save transformer config
                 print(self.transformer_config)
                 transformer_config_dict = asdict(self.transformer_config)
+                from megatron.core.transformer.enums import AttnBackend
                 to_convert_types = {torch.dtype: str, AttnBackend: str}
                 ignore_types = [Callable]
                 pop_keys = []
diff --git a/verl/utils/megatron_utils.py b/verl/utils/megatron_utils.py
index a10908c7d99..db22bc85606 100644
--- a/verl/utils/megatron_utils.py
+++ b/verl/utils/megatron_utils.py
@@ -21,7 +21,9 @@
 import os
 import warnings
 from dataclasses import dataclass
-from typing import Any
+from typing import Any, Dict
+import socket
+from datetime import datetime, timedelta
 
 import torch
 import torch.nn.functional as F
@@ -42,6 +44,38 @@
 from verl.utils.torch_dtypes import PrecisionType
 
 
+TIME_FORMAT_STR: str = "%b_%d_%H_%M_%S"
+
+
+def start_record_memory_history() -> None:
+   if not torch.cuda.is_available():
+       print("CUDA unavailable. Not recording memory history")
+       return
+
+   print("Starting snapshot record_memory_history")
+   torch.cuda.memory._record_memory_history(
+       max_entries=100000
+   )
+
+def stop_record_memory_history() -> None:
+   if not torch.cuda.is_available():
+       print("CUDA unavailable. Not recording memory history")
+       return
+    
+    # Prefix for file names.
+   host_name = socket.gethostname()
+   timestamp = datetime.now().strftime(TIME_FORMAT_STR)
+   file_prefix = f"{host_name}_{timestamp}"
+
+   try:
+       print(f"Saving snapshot to local file: {file_prefix}.pickle")
+       torch.cuda.memory._dump_snapshot(f"{file_prefix}.pickle")
+   except Exception as e:
+       print(f"Failed to capture memory snapshot {e}")
+
+   torch.cuda.memory._record_memory_history(enabled=None)
+
+
 def get_model_config(model):
     return get_attr_wrapped_model(model, "config", allow_none=False)
 
@@ -381,6 +415,50 @@ def load_megatron_model_to_gpu(models, load_grad=True):
     get_torch_device().empty_cache()
 
 
+@torch.no_grad()
+def offload_megatron_model_grad_to_cpu(models):
+    """
+    In megatron, the model and optimizer storage are:
+    - bf16 parameter data chunked in model parallel group
+    - fp32 grad chunked in model parallel group
+    - fp32 main_parameter chunked in model and dp group
+    - fp32 optimizer state chunked in model and dp group
+    """
+    for model_chunk in models:
+        if isinstance(model_chunk, DDP):
+            model_chunk_all_buffers = [model_chunk.buffers, model_chunk.expert_parallel_buffers]
+            for buffers in model_chunk_all_buffers:
+                for buffer in buffers:
+                    if buffer.grad_data.storage().size() > 0:
+                        # if the grad_data size is already zero, we assume that it is already offloaded
+                        buffer.grad_data_size = buffer.grad_data.storage().size()
+                        buffer.grad_data.storage().resize_(0)
+        else:
+            # we need this for ref module
+            for _, param in model_chunk.named_parameters():
+                if param.grad is not None:
+                    param.grad = param.grad.to("cpu", non_blocking=True)
+    gc.collect()
+    get_torch_device().empty_cache()
+
+@torch.no_grad()
+def load_megatron_model_grad_to_gpu(models):
+    for model_chunk in models:
+        if isinstance(model_chunk, DDP):
+            model_chunk_all_buffers = [model_chunk.buffers, model_chunk.expert_parallel_buffers]
+            for buffers in model_chunk_all_buffers:
+                for buffer in buffers:
+                    buffer.grad_data.storage().resize_(buffer.grad_data_size)
+                    buffer.grad_data.zero_()
+        else:
+            # we need this for ref module
+            device_id = get_device_id()
+            for _, param in model_chunk.named_parameters():
+                if param.grad is not None:
+                    param.grad = param.grad.to(device_id, non_blocking=True)
+    gc.collect()
+    get_torch_device().empty_cache()
+
 @torch.no_grad()
 def offload_megatron_copy_params(optimizers):
     """
@@ -519,7 +597,33 @@ def get_hf_model_checkpoint_path(checkpoint_path):
 
 def get_transformer_config_checkpoint_path(checkpoint_path):
     os.makedirs(checkpoint_path, exist_ok=True)
-    return os.path.join(checkpoint_path, "transformer_config.json")
+    # TODO: use a more standard name?
+    # return os.path.join(checkpoint_path, "transformer_config.json")
+    return os.path.join(checkpoint_path, "hf_config_and_tokenizer")
+
+
+def get_optimizer_checkpoint_path(checkpoint_path, use_distributed_optimizer=True):
+    os.makedirs(os.path.join(checkpoint_path, "optim"), exist_ok=True)
+    if not use_distributed_optimizer:
+        return os.path.join(checkpoint_path, "optim", "optim.pt")
+    pp_rank = mpu.get_pipeline_model_parallel_rank()
+    tp_rank = mpu.get_tensor_model_parallel_rank()
+    # cp_rank = mpu.get_context_parallel_rank()
+    # dp_rank = mpu.get_data_parallel_rank()
+    ep_rank = mpu.get_expert_model_parallel_rank()
+    return os.path.join(checkpoint_path, "optim", f"distrib_optim_pp{pp_rank}_tp{tp_rank}_ep{ep_rank}.pt")
+
+
+def get_rng_states_checkpoint_path(checkpoint_path, only_rank0_save=True):
+    # save rng states cause interrupts
+    os.makedirs(os.path.join(checkpoint_path, "rng_states"), exist_ok=True)
+    if only_rank0_save:
+        return os.path.join(checkpoint_path, "rng_states", "rng_states.pt")
+    dp_rank = mpu.get_data_parallel_rank()
+    pp_rank = mpu.get_pipeline_model_parallel_rank()
+    tp_rank = mpu.get_tensor_model_parallel_rank()
+    cp_rank = mpu.get_context_parallel_rank()
+    return os.path.join(checkpoint_path, "rng_states", f"rng_states_pp{pp_rank}_tp{tp_rank}_cp{cp_rank}_dp{dp_rank}.pt")
 
 
 def convert_megatron_model_to_transformers_model(
@@ -807,14 +911,11 @@ def default_tp_concat_fn(
     return infer_params
 
 
-def per_tensor_generator(
-    actor_module,
-    model_config,
-    weight_converter,
-    transformer_config,
-    layer_name_mapping,
-    convert_qkv_gate_up_by_simple_split=True,
-):
+def per_tensor_generator_bucketed(actor_module, model_config, weight_converter, transformer_config, layer_name_mapping, convert_qkv_gate_up_by_simple_split=True, target_device=None, bucket_size_mb=100):
+    """
+    per_tensor_generator with bucket size, return bucket-level iterator
+    each bucket contains multiple tensors, reduce distributed communication times
+    """
     from megatron.core import parallel_state as mpu
 
     pp_rank = mpu.get_pipeline_model_parallel_rank()
@@ -826,6 +927,198 @@ def per_tensor_generator(
     all_gather_group = mpu.get_tensor_model_parallel_group()
     all_gather_group_size = torch.distributed.get_world_size(group=all_gather_group)
 
+    if target_device is None:
+        target_device = torch.cuda.current_device()
+    else:
+        target_device = torch.device(target_device)
+
+    def to_device(tensor):
+        if tensor is None:
+            return None
+        if isinstance(tensor, list):
+            return [t.to(target_device) for t in tensor]
+        return tensor.to(target_device)
+
+    def tensor_generator():
+        for scan_vpp_idx in range(vpp_size):
+            existing_keys = set()
+            model = unwrap_model(actor_module[scan_vpp_idx])
+            for name, param in model.named_parameters():
+                existing_keys.add(name)
+                yield name, param
+            extra_keys = [x for x in model.state_dict().keys() if "_extra_state" not in x and x not in existing_keys]
+            for name in extra_keys:
+                yield name, model.state_dict()[name].to(torch.cuda.current_device())
+
+    def calculate_tensor_size(tensor):
+        """calculate tensor size (bytes)"""
+        if tensor is None:
+            return 0
+        return tensor.numel() * tensor.element_size()
+
+    # step 1: collect all tensor meta info
+    meta_info = []
+    for scan_vpp_idx in range(vpp_size):
+        existing_keys = set()
+        model = unwrap_model(actor_module[scan_vpp_idx])
+        for idx, (name, _) in enumerate(model.named_parameters()):
+            existing_keys.add(name)
+            meta_info.append((pp_rank, scan_vpp_idx, idx, name))
+        extra_keys = [x for x in model.state_dict().keys() if "_extra_state" not in x and x not in existing_keys]
+        for name in extra_keys:
+            meta_info.append((pp_rank, scan_vpp_idx, idx, name))
+
+    obj_spec_output = [None] * mpu.get_pipeline_model_parallel_world_size()
+    torch.distributed.all_gather_object(object_list=obj_spec_output, obj=meta_info, group=mpu.get_pipeline_model_parallel_group())
+    layer_list_meta = [item for sublist in obj_spec_output for item in sublist]
+
+    # step 2: group tensors by bucket
+    bucket_size_bytes = bucket_size_mb * 1024 * 1024  # convert to bytes
+    buckets = []
+    current_bucket = []
+    current_bucket_size = 0
+    
+    gen_func = tensor_generator()
+    
+    for cur_pp_rank, scan_vpp_idx, idx, name in layer_list_meta:
+        if model_config.tie_word_embeddings and ("output_layers" in name):
+            import warnings
+
+            warnings.warn("Current model sharing word and embedding weights, skip output layer conversion", stacklevel=2)
+            continue
+
+        if cur_pp_rank == pp_rank:
+            try:
+                cur_name, cur_tensor = next(gen_func)
+            except StopIteration:
+                cur_name, cur_tensor = None, None
+            cur_name = normalize_model_name(name, cur_pp_rank, scan_vpp_idx, transformer_config)
+        else:
+            cur_tensor, cur_name = None, None
+
+        # fix parameter name
+        while cur_name.startswith("module."):
+            cur_name = cur_name[len("module.") :]
+
+        # calculate tensor size
+        tensor_size = calculate_tensor_size(cur_tensor)
+        
+        # if current bucket is already large enough, or adding this tensor will exceed bucket size, create new bucket
+        if current_bucket_size + tensor_size > bucket_size_bytes and current_bucket:
+            buckets.append(current_bucket)
+            current_bucket = []
+            current_bucket_size = 0
+        
+        current_bucket.append((cur_pp_rank, scan_vpp_idx, idx, name, cur_name, cur_tensor))
+        current_bucket_size += tensor_size
+
+    # add last bucket
+    if current_bucket:
+        buckets.append(current_bucket)
+
+    # step 3: batch broadcast tensors by bucket, return bucket-level iterator
+    for bucket_idx, bucket in enumerate(buckets):
+        # batch broadcast all tensors in bucket
+        bucket_names = []
+        bucket_tensors = []
+        
+        for cur_pp_rank, scan_vpp_idx, idx, name, cur_name, cur_tensor in bucket:
+            bucket_names.append(cur_name)
+            bucket_tensors.append(cur_tensor)
+        
+        # batch broadcast names
+        bucket_names_broadcasted = broadcast_str_from_megatron_pp(bucket_names)
+        
+        # batch broadcast tensors (need special handling because tensors may be on different ranks)
+        bucket_tensors_broadcasted = []
+        
+        for i, (cur_pp_rank, scan_vpp_idx, idx, name, cur_name, cur_tensor) in enumerate(bucket):
+            if cur_tensor is not None:
+                # if tensor is on current rank, broadcast to other ranks
+                broadcasted_tensor = broadcast_from_megatron_pp(cur_tensor)
+            else:
+                # if tensor is not on current rank, receive broadcast
+                broadcasted_tensor = broadcast_from_megatron_pp(None)
+            
+            bucket_tensors_broadcasted.append(broadcasted_tensor)
+
+        # process all tensors in current bucket, including EP and TP logic
+        for name, broad_pp_tensor in zip(bucket_names_broadcasted, bucket_tensors_broadcasted):
+            if broad_pp_tensor is None:
+                continue
+                
+            # EP (Expert Parallel) logic
+            if ".mlp.experts.linear_fc" in name and ep_size > 1:
+                num_experts = weight_converter.mcore_config.num_moe_experts
+                num_experts_per_rank = num_experts // ep_size
+                infer_params = [torch.empty_like(broad_pp_tensor) for _ in range(ep_size)]
+                torch.distributed.all_gather(infer_params, broad_pp_tensor, group=ep_group)
+
+                name_prefix, local_expert_id = name.split(".weight")
+                local_expert_id = int(local_expert_id)
+                global_expert_ids = [num_experts_per_rank * ep_rank + local_expert_id for ep_rank in range(ep_size)]
+                global_expert_names = [f"{name_prefix}.weight{expert_id}" for expert_id in global_expert_ids]
+
+                for expert_name, param in zip(global_expert_names, infer_params):
+                    if etp_size > 1:
+                        # gather etp
+                        etp_params = [torch.empty_like(param) for _ in range(etp_size)]
+                        torch.distributed.all_gather(etp_params, param, group=etp_group)
+                        params = etp_params
+                    else:
+                        params = [param]
+
+                    merge_params = default_tp_concat_fn(layer_name_mapping, expert_name, broad_pp_tensor, params, model_config, weight_converter.hf_config, convert_qkv_gate_up_by_simple_split)
+                    if not isinstance(merge_params, list):
+                        merge_params = [merge_params]
+                    converted_names, converted_params = weight_converter.convert_param(expert_name, merge_params)
+
+                    yield from zip(converted_names, to_device(converted_params))
+                continue
+
+            # TP (Tensor Parallel) logic
+            if tp_utils.is_tensor_parallel_param(broad_pp_tensor):
+                # allocate a new tensor with proper size
+                if all_gather_group_size <= 1:
+                    infer_params = [broad_pp_tensor]
+                else:
+                    infer_params = [torch.empty_like(broad_pp_tensor) for _ in range(all_gather_group_size)]
+                    torch.distributed.all_gather(infer_params, broad_pp_tensor, group=mpu.get_tensor_model_parallel_group())
+                infer_params = default_tp_concat_fn(layer_name_mapping, name, broad_pp_tensor, infer_params, model_config, weight_converter.hf_config, convert_qkv_gate_up_by_simple_split)
+            else:
+                infer_params = broad_pp_tensor
+
+            if not isinstance(infer_params, list):
+                infer_params = [infer_params]
+            converted_names, converted_params = weight_converter.convert_param(name, infer_params)
+
+            yield from zip(converted_names, to_device(converted_params))
+
+
+def per_tensor_generator(actor_module, model_config, weight_converter, transformer_config, layer_name_mapping, convert_qkv_gate_up_by_simple_split=True, target_device=None):
+    from megatron.core import parallel_state as mpu
+
+    pp_rank = mpu.get_pipeline_model_parallel_rank()
+    ep_size = mpu.get_expert_model_parallel_world_size()
+    etp_size = mpu.get_expert_tensor_parallel_world_size()
+    ep_group = mpu.get_expert_model_parallel_group()
+    etp_group = mpu.get_expert_tensor_parallel_group()
+    vpp_size = len(actor_module)
+    all_gather_group = mpu.get_tensor_model_parallel_group()
+    all_gather_group_size = torch.distributed.get_world_size(group=all_gather_group)
+
+    if target_device is None:
+        target_device = torch.cuda.current_device()
+    else:
+        target_device = torch.device(target_device)
+    
+    def to_device(tensor):
+        if tensor is None:
+            return None
+        if isinstance(tensor, list):
+            return [t.to(target_device) for t in tensor]
+        return tensor.to(target_device)
+
     def tensor_generator():
         for scan_vpp_idx in range(vpp_size):
             existing_keys = set()
@@ -979,10 +1272,7 @@ def get_transformer_layer_offset(pipeline_rank, vp_stage, config: TransformerCon
             offset = config.pipeline_model_parallel_layout.get_layer_offset(
                 layer_type=LayerType.decoder, vp_stage=vp_stage
             )
-        elif (
-            config.num_layers_in_first_pipeline_stage is not None
-            or config.num_layers_in_last_pipeline_stage is not None
-        ):
+        elif config.first_pipeline_num_layers is not None or config.last_pipeline_num_layers is not None:
             # Calculate number of pipeline stages to distribute the remaining Transformer
             # layers after deducting the Transformer layers in the first or the last stages
             middle_pipeline_stages = config.pipeline_model_parallel_size
@@ -990,8 +1280,8 @@ def get_transformer_layer_offset(pipeline_rank, vp_stage, config: TransformerCon
                 [
                     1 if x is not None else 0
                     for x in (
-                        config.num_layers_in_first_pipeline_stage,
-                        config.num_layers_in_last_pipeline_stage,
+                        config.first_pipeline_num_layers,
+                        config.last_pipeline_num_layers,
                     )
                 ]
             )
@@ -1000,16 +1290,10 @@ def get_transformer_layer_offset(pipeline_rank, vp_stage, config: TransformerCon
             # num_layers_in_first_pipeline_stage and num_layers_in_last_pipeline_stage
             # are not set, we will not enable uneven pipeline. All layers will be treated
             # as middle layers.
-            num_layers_in_first_pipeline_stage = (
-                0 if config.num_layers_in_first_pipeline_stage is None else config.num_layers_in_first_pipeline_stage
-            )
-            num_layers_in_last_pipeline_stage = (
-                0 if config.num_layers_in_last_pipeline_stage is None else config.num_layers_in_last_pipeline_stage
-            )
+            first_pipeline_num_layers = 0 if config.first_pipeline_num_layers is None else config.first_pipeline_num_layers
+            last_pipeline_num_layers = 0 if config.last_pipeline_num_layers is None else config.last_pipeline_num_layers
 
-            middle_num_layers = (
-                config.num_layers - num_layers_in_first_pipeline_stage - num_layers_in_last_pipeline_stage
-            )
+            middle_num_layers = config.num_layers - first_pipeline_num_layers - last_pipeline_num_layers
 
             if (vp_size := config.virtual_pipeline_model_parallel_size) is not None:
                 assert vp_stage is not None, "vp_stage must be provided if virtual pipeline model parallel size is set"
@@ -1018,17 +1302,9 @@ def get_transformer_layer_offset(pipeline_rank, vp_stage, config: TransformerCon
                 # If the num_layers_in_first_pipeline_stage and
                 # num_layers_in_last_pipeline_stage are not set, all pipeline stages
                 # will be treated as middle pipeline stages in the calculation
-                num_layers_per_virtual_model_chunk_in_first_pipeline_stage = (
-                    0
-                    if config.num_layers_in_first_pipeline_stage is None
-                    else config.num_layers_in_first_pipeline_stage // vp_size
-                )
+                num_layers_per_virtual_model_chunk_in_first_pipeline_stage = 0 if config.first_pipeline_num_layers is None else config.first_pipeline_num_layers // vp_size
 
-                num_layers_per_virtual_model_chunk_in_last_pipeline_stage = (
-                    0
-                    if config.num_layers_in_last_pipeline_stage is None
-                    else config.num_layers_in_last_pipeline_stage // vp_size
-                )
+                num_layers_per_virtual_model_chunk_in_last_pipeline_stage = 0 if config.last_pipeline_num_layers is None else config.last_pipeline_num_layers // vp_size
 
                 num_layers_per_vritual_model_chunk_in_middle_pipeline_stage = middle_num_layers // vp_size
 
@@ -1055,25 +1331,15 @@ def get_transformer_layer_offset(pipeline_rank, vp_stage, config: TransformerCon
                 else:
                     num_layers_per_pipeline_rank = 0
 
-                middle_pipeline_rank = (
-                    pipeline_rank if config.num_layers_in_first_pipeline_stage is None else pipeline_rank - 1
-                )
+                middle_pipeline_rank = pipeline_rank if config.first_pipeline_num_layers is None else pipeline_rank - 1
 
                 if pipeline_rank == 0:
                     offset = 0
                 else:
-                    offset = (middle_pipeline_rank * num_layers_per_pipeline_rank) + num_layers_in_first_pipeline_stage
+                    offset = (middle_pipeline_rank * num_layers_per_pipeline_rank) + first_pipeline_num_layers
         else:
             num_layers = config.num_layers
 
-            # Increase the number of layers by one if we include the embedding (loss)
-            # layer into pipeline parallelism partition and placement
-            if config.account_for_embedding_in_pipeline_split:
-                num_layers += 1
-
-            if config.account_for_loss_in_pipeline_split:
-                num_layers += 1
-
             num_layers_per_pipeline_rank = num_layers // config.pipeline_model_parallel_size
 
             if (vp_size := config.virtual_pipeline_model_parallel_size) is not None:
diff --git a/verl/utils/model.py b/verl/utils/model.py
index 04cc34fe58b..7aade87acd6 100644
--- a/verl/utils/model.py
+++ b/verl/utils/model.py
@@ -483,22 +483,48 @@ def load_megatron_model_weights(
     return model.config
 
 
-def load_megatron_gptmodel_weights(
-    config, model_config, parallel_model, params_dtype, is_value_model=False, local_cache_path="~/.cache/verl/rlhf"
-):
-    """Load weights for mcore GPT model."""
-    _, model, state_dict, is_value_model = _load_hf_model(config, model_config, is_value_model, local_cache_path)
-
-    from verl.models.mcore.loader import load_state_dict_to_megatron_gptmodel
-
-    load_state_dict_to_megatron_gptmodel(
-        state_dict=state_dict,
-        wrapped_models=parallel_model,
-        config=model.config,
-        params_dtype=params_dtype,
-        is_value_model=is_value_model,
-    )
-    del state_dict, model
+def load_megatron_gptmodel_weights(config, model_config, parallel_model, params_dtype, is_value_model=False, local_cache_path="~/.cache/verl/rlhf"):
+    # """Load weights for mcore GPT model."""
+    # _, model, state_dict, is_value_model = _load_hf_model(config, model_config, is_value_model, local_cache_path)
+
+    # from verl.models.mcore.loader import load_state_dict_to_megatron_gptmodel
+
+    # load_state_dict_to_megatron_gptmodel(
+    #     state_dict=state_dict,
+    #     wrapped_models=parallel_model,
+    #     config=model.config,
+    #     params_dtype=params_dtype,
+    #     is_value_model=is_value_model,
+    # )
+    # del state_dict, model
+    from megatron.core import parallel_state
+    from verl.models.mcore.hf_mcore_loader import DeepseekV2HfLoader
+    from verl.utils.megatron_utils import unwrap_model
+    
+    model_chunks = unwrap_model(parallel_model)
+    
+    tp_rank = parallel_state.get_tensor_model_parallel_rank()
+    tp_size = parallel_state.get_tensor_model_parallel_world_size()
+    pp_rank = parallel_state.get_pipeline_model_parallel_rank()
+    pp_size = parallel_state.get_pipeline_model_parallel_world_size()
+    ep_rank = parallel_state.get_expert_model_parallel_rank()
+    ep_size = parallel_state.get_expert_model_parallel_world_size()
+    vp_size = parallel_state.get_virtual_pipeline_model_parallel_world_size()
+    etp_size = parallel_state.get_expert_tensor_parallel_world_size()
+    etp_rank = parallel_state.get_expert_tensor_parallel_rank()
+    if vp_size is None:
+        vp_size = 1
+    pp_size_with_vp = vp_size * pp_size
+    
+    for i in range(len(model_chunks)):
+        pp_rank_with_vp = pp_size * i + pp_rank
+        loader = DeepseekV2HfLoader(config, model_config, tp_size=tp_size, tp_rank=tp_rank, pp_size=pp_size_with_vp, pp_rank=pp_rank_with_vp, ep_size=ep_size, ep_rank=ep_rank)
+        state_dict = loader.load()
+        missing_keys, unexpected_keys = model_chunks[i].load_state_dict(state_dict, strict=False)
+        assert all(
+        map(lambda x: '_extra_state' in x, missing_keys)), f'Only missing "_extra_state" or "rm_head" is accepted. But got {list(filter(lambda x: "_extra_state" not in x, missing_keys))}'
+        assert len(unexpected_keys) == 0, f'Unexpected keys is not accepted: {unexpected_keys}, {model_chunks[i]}'
+    
 
 
 # pad input_ids_rmpad, cu_seqlens and max_seqlen_in_batch to be divisible by tp
diff --git a/verl/utils/reward_score/__init__.py b/verl/utils/reward_score/__init__.py
index 5151c515149..9950b1c4098 100644
--- a/verl/utils/reward_score/__init__.py
+++ b/verl/utils/reward_score/__init__.py
@@ -101,7 +101,15 @@ def default_compute_score(
         from . import search_r1_like_qa_em
 
         res = search_r1_like_qa_em.compute_score(solution_str, ground_truth)
-
+    elif data_source in ['ifeval', 'ifeval_benchmark', 'ifeval_like_3w_v1', 'Nemotron-IFEVAL']:
+        # from .ifeavl import compute_score
+        from .ifeval_online import compute_score
+
+        res = compute_score(solution_str, ground_truth)
+    elif data_source in ['deepscaler']:
+        from .math_verify import compute_score
+        res = compute_score(solution_str, ground_truth, timeout_score=2)
+        
     else:
         raise NotImplementedError(f"Reward function is not implemented for {data_source=}")
 
diff --git a/verl/utils/reward_score/complex_reward/__init__.py b/verl/utils/reward_score/complex_reward/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/verl/utils/reward_score/complex_reward/mathverify_format.py b/verl/utils/reward_score/complex_reward/mathverify_format.py
new file mode 100644
index 00000000000..cba9ac76770
--- /dev/null
+++ b/verl/utils/reward_score/complex_reward/mathverify_format.py
@@ -0,0 +1,12 @@
+from verl.utils.reward_score.math_verify import compute_score as math_verify_compute
+from verl.utils.reward_score.format_reward import format_reward as format_compute 
+
+def compute_score(model_output: str, ground_truth: str, timeout_score: float = 2) -> bool:
+    math_verify_score = math_verify_compute(model_output, ground_truth, timeout_score)
+    formatted_score = format_compute(model_output)
+    if formatted_score == 1.0:
+        total_score = (math_verify_score+formatted_score)/2
+    else:
+        total_score = 0.0
+    return {'format_sub': formatted_score, 'math_verify_sub': math_verify_score, 'score': total_score}
+
diff --git a/verl/utils/reward_score/format_reward.py b/verl/utils/reward_score/format_reward.py
new file mode 100644
index 00000000000..b6d9c9b632d
--- /dev/null
+++ b/verl/utils/reward_score/format_reward.py
@@ -0,0 +1,12 @@
+import re
+
+
+
+def format_reward(solution_str):
+    """Reward function that checks if the reasoning process is enclosed within <think> and </think> tags, while the final answer is enclosed within <answer> and </answer> tags."""
+    pattern = r"^<think>.*?</think>.*?<answer>.*?</answer>$"
+    match = re.match(pattern, solution_str.strip(), re.DOTALL | re.MULTILINE) 
+    return 1.0 if match else 0.0
+
+def format_v1(solution_str):
+    return format_reward(solution_str)
\ No newline at end of file
diff --git a/verl/utils/reward_score/ifeavl.py b/verl/utils/reward_score/ifeavl.py
new file mode 100644
index 00000000000..c8cc96b1cdf
--- /dev/null
+++ b/verl/utils/reward_score/ifeavl.py
@@ -0,0 +1,139 @@
+import numpy as np
+from lm_eval.tasks.ifeval.utils import process_results
+from tqdm import tqdm
+import json
+import pandas as pd
+import fire
+
+def convert_arrays_and_floats_to_simple_types(obj):
+    """
+    递归地将对象中：
+    1. 所有的 NumPy array 转换为普通 Python 列表
+    2. 所有表示整数的浮点数转换为整数
+    """
+    if isinstance(obj, np.ndarray):
+        # 如果对象是 NumPy array，转换它
+        return [convert_arrays_and_floats_to_simple_types(item) for item in obj]
+    elif isinstance(obj, (list, tuple)):
+        # 如果是列表或元组，递归处理每个元素
+        return [convert_arrays_and_floats_to_simple_types(item) for item in obj]
+    elif isinstance(obj, dict):
+        # 如果是字典，递归处理每个值
+        return {key: convert_arrays_and_floats_to_simple_types(value) for key, value in obj.items()}
+    elif isinstance(obj, float) and obj.is_integer():
+        # 如果是浮点数且值为整数，转换为整数
+        return int(obj)
+    else:
+        # 其他类型原样返回
+        return obj
+
+
+# Qwen-2.5-Instruct
+def split_solution_qwen(solution_str):
+    user_prompt, response = solution_str.split("<|im_start|>user")[1].split("<|im_start|>assistant")
+    user_prompt = user_prompt.split('<|im_end|>')[0].strip()
+    response = response.split("<|im_end|>")[0].strip()
+    return user_prompt, response
+
+# Qwen-2.5-R1-Distilled
+
+
+def split_solution_qwen_distilled(solution_str):
+    user_prompt = solution_str.split('<｜Assistant｜><think>')[0].split('<｜begin▁of▁sentence｜><｜User｜>')[1].strip()
+    new_response = solution_str.split('<｜Assistant｜><think>')[1].split('<｜end▁of▁sentence｜>')[0].strip()
+    return user_prompt, new_response
+
+
+def split_r1_or_qwen(solution_str):
+
+    if 'Assistant: <think>' in solution_str:
+        user_prompt = solution_str.split('Assistant: <think>')[0].split('User: ')[1].strip()
+        new_response = solution_str.split('Assistant: <think>')[1].split('<｜end▁of▁sentence｜>')[0].strip()
+        # 将思考过程去除
+        if '</think>' in new_response:
+            new_response = new_response.split('</think>')[1].strip()
+        return user_prompt, new_response
+    else:
+        user_prompt, response = solution_str.split("User: ")[1].split("Assistant: ").strip()
+        user_prompt = user_prompt.split('<|im_end|>')[0].strip()
+        response = response.split("<｜end▁of▁sentence｜>")[0].strip()
+        # 将思考过程去除
+        if '</think>' in response:
+            response = response.split('</think>')[1].strip()
+        return user_prompt, response
+
+
+def compute_score(
+    solution_str,
+    ground_truth,
+):
+    try:
+        if isinstance(ground_truth, str):
+            ground_truth = json.loads(ground_truth)
+        prompt, response = "", solution_str
+        item = {"key": None, "instruction_id_list": convert_arrays_and_floats_to_simple_types(ground_truth["instruction_id_list"]),
+                "kwargs": convert_arrays_and_floats_to_simple_types(ground_truth["kwargs"]), "prompt": prompt}
+
+        ifeval_results = process_results(item, [response])
+        if ifeval_results['prompt_level_strict_acc']:
+            return 1.
+        else:
+            return 0.
+    except Exception as err:
+        print(f'IFEval Reward, [ERROR] Compute Score Error: {err}')
+        return 0.
+
+
+
+def debug(test_file : str = "/cpfs/user/liuyanjiang/Eng/unitest/offline_single_res.jsonl"):
+
+    df = pd.read_parquet(
+        "/cpfs/user/lizichao/RedMOE-verl-new/dataset/test_ifeval_benchmark.parquet", 
+    )
+    content = [{col: row[col] for col in df.columns} for _, row in df.iterrows()]
+
+    prompt_to_gt = dict()
+    for item in tqdm(content):
+        prompt_to_gt[item['prompt'][1]['content']] = convert_arrays_and_floats_to_simple_types(item['reward_model']['ground_truth'])
+
+    with open(test_file, 'r') as f:
+        content = [json.loads(line) for line in f]
+
+    print('read done')
+
+    '''
+    acc = 0
+    processed_prompts = set()
+    for item in tqdm(content):
+        if item['prompt'][1]['content'] in processed_prompts:
+            continue
+        gt = prompt_to_gt[item['prompt'][1]['content']]
+        x = {"key": None, "instruction_id_list": gt["instruction_id_list"],
+                 "kwargs": gt["kwargs"], "prompt": item['prompt'][1]['content']}
+        ifeval_results = process_results(x, item['rollout'])
+        if ifeval_results['prompt_level_strict_acc']:
+            acc += 1
+        processed_prompts.add(item['prompt'][1]['content'])
+
+    print(acc / len(processed_prompts))
+    '''
+    acc = 0
+    processed_prompts = set()
+    for item in tqdm(content):
+        prompt = item['prompt'].split('<|userprompt|>')[1].split('<|endofuserprompt|>')[0]
+        if prompt in processed_prompts:
+            continue
+        gt = prompt_to_gt[prompt]
+        x = {"key": None, "instruction_id_list": gt["instruction_id_list"],
+                 "kwargs": gt["kwargs"], "prompt": prompt}
+        ifeval_results = process_results(x, [item['resp']])
+        if ifeval_results['prompt_level_strict_acc']:
+            acc += 1
+        processed_prompts.add(prompt)
+
+    print(acc / len(processed_prompts))
+    print(len(processed_prompts))
+
+
+if __name__ == "__main__":
+    fire.Fire(debug)
\ No newline at end of file
diff --git a/verl/utils/reward_score/ifeval_online.py b/verl/utils/reward_score/ifeval_online.py
new file mode 100644
index 00000000000..949f62a33cd
--- /dev/null
+++ b/verl/utils/reward_score/ifeval_online.py
@@ -0,0 +1,85 @@
+import numpy as np
+from lm_eval.tasks.ifeval.utils import process_results
+from tqdm import tqdm
+import json
+import pandas as pd
+
+def convert_arrays_and_floats_to_simple_types(obj):
+    """
+    递归地将对象中：
+    1. 所有的 NumPy array 转换为普通 Python 列表
+    2. 所有表示整数的浮点数转换为整数
+    """
+    if isinstance(obj, np.ndarray):
+        # 如果对象是 NumPy array，转换它
+        return [convert_arrays_and_floats_to_simple_types(item) for item in obj]
+    elif isinstance(obj, (list, tuple)):
+        # 如果是列表或元组，递归处理每个元素
+        return [convert_arrays_and_floats_to_simple_types(item) for item in obj]
+    elif isinstance(obj, dict):
+        # 如果是字典，递归处理每个值
+        return {key: convert_arrays_and_floats_to_simple_types(value) for key, value in obj.items()}
+    elif isinstance(obj, float) and obj.is_integer():
+        # 如果是浮点数且值为整数，转换为整数
+        return int(obj)
+    else:
+        # 其他类型原样返回
+        return obj
+
+
+# Qwen-2.5-Instruct
+def split_solution_qwen(solution_str):
+    user_prompt, response = solution_str.split("<|im_start|>user")[1].split("<|im_start|>assistant")
+    user_prompt = user_prompt.split('<|im_end|>')[0].strip()
+    response = response.split("<|im_end|>")[0].strip()
+    return user_prompt, response
+
+# Qwen-2.5-R1-Distilled
+
+
+def split_solution_qwen_distilled(solution_str):
+    user_prompt = solution_str.split('<｜Assistant｜><think>')[0].split('<｜begin▁of▁sentence｜><｜User｜>')[1].strip()
+    new_response = solution_str.split('<｜Assistant｜><think>')[1].split('<｜end▁of▁sentence｜>')[0].strip()
+    return user_prompt, new_response
+
+
+def split_r1_or_qwen(solution_str):
+    if "<|userprompt|>" in solution_str and "<|endofuserprompt|>" in solution_str:
+        user_prompt, response = solution_str.split("<|userprompt|>")[1].split("<|endofuserprompt|><|response|>")
+        response = response.split("<|endofresponse|>")[0].strip()
+    else:
+        user_prompt = "" 
+        if "<|endofuserprompt|><|response|>" in solution_str:
+            response = solution_str.split("<|endofuserprompt|><|response|>")[1]
+            if "<|endofresponse|>" in response:
+                response = response.split("<|endofresponse|>")[0].strip()
+        elif "<|endofresponse|>" in solution_str:
+            response = solution_str.split("<|endofresponse|>")[0].strip()
+        else:
+            response = solution_str
+    # 将思考过程去除
+    if '</think>' in response:
+        response = response.split('</think>')[1].strip()
+    return user_prompt, response
+
+
+def compute_score(
+    solution_str,
+    ground_truth,
+):
+    # try:
+    # print(f"=== {solution_str=} ===")
+    if isinstance(ground_truth, str):
+        ground_truth = json.loads(ground_truth)
+    prompt, response = split_r1_or_qwen(solution_str)
+    item = {"key": None, "instruction_id_list": convert_arrays_and_floats_to_simple_types(ground_truth["instruction_id_list"]),
+            "kwargs": convert_arrays_and_floats_to_simple_types(ground_truth["kwargs"]), "prompt": prompt}
+
+    ifeval_results = process_results(item, [response])
+    if ifeval_results['prompt_level_strict_acc']:
+        return 1.
+    else:
+        return 0.
+    # except Exception as err:
+    #     print(f'IFEval Reward, [ERROR] Compute Score Error: {err}')
+    #     return 0.
diff --git a/verl/utils/ulysses.py b/verl/utils/ulysses.py
index 1669f6f32f9..63339d49a9e 100644
--- a/verl/utils/ulysses.py
+++ b/verl/utils/ulysses.py
@@ -323,6 +323,21 @@ def ulysses_pad_and_slice_inputs(
 
 def validate_ulysses_config(num_heads, ulysses_sequence_size):
     if ulysses_sequence_size > 1:
-        assert num_heads % ulysses_sequence_size == 0, (
-            f"num_heads ({num_heads}) must be divisible by ulysses sequence size({ulysses_sequence_size})"
-        )
+        assert num_heads % ulysses_sequence_size == 0, f"num_heads ({num_heads}) must be divisible by ulysses sequence size({ulysses_sequence_size})"
+
+
+def gather_position_ids_seq_dim(position_ids: torch.Tensor, group: Optional[dist.ProcessGroup] = None):
+    """
+    Gather position_ids across seq-dimension from all sequence parallel ranks.
+
+    Assumes position_ids shape: [bsz, seq_chunk_per_rank]
+    Returns: [bsz, full_seq_len]
+    """
+    group = get_ulysses_sequence_parallel_group() if group is None else group
+    if not group:
+        return position_ids
+
+    world_size = dist.get_world_size(group)
+    gathered_list = [None for _ in range(world_size)]
+    dist.all_gather_object(gathered_list, position_ids.cpu(), group=group)
+    return torch.cat([t.to(position_ids.device) for t in gathered_list], dim=1)
diff --git a/verl/workers/actor/megatron_actor.py b/verl/workers/actor/megatron_actor.py
index 5144857769d..0ac6da766e4 100644
--- a/verl/workers/actor/megatron_actor.py
+++ b/verl/workers/actor/megatron_actor.py
@@ -49,6 +49,7 @@
 from verl.utils.torch_functional import broadcast_dict_tensor
 from verl.workers.actor import BasePPOActor
 
+
 __all__ = ["MegatronPPOActor"]
 
 logger = logging.getLogger(__file__)
@@ -64,6 +65,7 @@ def __init__(
         tf_config,
         actor_module: nn.ModuleList,
         actor_optimizer: DistributedOptimizer,
+        tokenizer,
     ):
         """MeagtronPPOActor class. This class implements the simple PPO logics when the model is built with Megatron.
 
@@ -122,7 +124,7 @@ def __init__(
         self.tf_config = tf_config
         self.actor_module = actor_module
         self.actor_optimizer: DistributedOptimizer = actor_optimizer
-        self.use_torch_profiler = self.config.profiler.get("tool") == "torch"
+        self.use_torch_profiler = hasattr(self.config, "profiler") and self.config.profiler.get("tool") == "torch"
         if self.use_torch_profiler:
             self.prof = Profiler(
                 self.config.profiler, tool_config=self.config.profiler.get("tool_config", {}).get("torch", {})
@@ -149,6 +151,7 @@ def __init__(
                 "reduce_grads_use_alltoall": False,
             }
         )
+        self.tokenizer = tokenizer
 
         config = get_model_config(self.actor_module[0])
         print(config)
@@ -183,6 +186,9 @@ def compute_log_prob(self, data: DataProto, calculate_entropy=False) -> torch.Te
         Returns:
             DataProto: torch.Tensor: the log_prob tensor
         """
+        import torch._dynamo
+        torch._dynamo.config.suppress_errors = True
+
         data.to(get_device_id())
         data.batch = data.batch.contiguous()
         use_dynamic_bsz = data.meta_info.get("use_dynamic_bsz", False)
@@ -532,17 +538,13 @@ def logits_processor(logits, label, label_mask):
                     ret["log_probs"] = log_probs
                     return ret
 
-                logits_processor_args = {"label": label, "label_mask": label_mask}
-                output = forward_fn(
-                    model,
-                    input_ids,
-                    attention_mask,
-                    position_ids,
-                    sequence_parallel=self.tf_config.sequence_parallel,
-                    multi_modal_inputs=multi_modal_inputs,
-                    logits_processor=logits_processor,
-                    logits_processor_args=logits_processor_args,
-                )
+            logits_processor_args = {"label": label, "label_mask": label_mask}
+
+            from verl.models.mcore import get_mcore_forward_fn
+
+            forward_fn = get_mcore_forward_fn(self.hf_config)
+
+            output = forward_fn(model, input_ids, attention_mask, position_ids, sequence_parallel=self.tf_config.sequence_parallel, logits_processor=logits_processor, logits_processor_args=logits_processor_args)
 
             if forward_only:
                 meta_info = None
diff --git a/verl/workers/config/rollout.py b/verl/workers/config/rollout.py
index 2b17408035e..d4bfa18f766 100644
--- a/verl/workers/config/rollout.py
+++ b/verl/workers/config/rollout.py
@@ -47,6 +47,8 @@ class MultiTurnConfig(BaseConfig):
     max_assistant_turns: Optional[int] = None
     tool_config_path: Optional[str] = None
     max_user_turns: Optional[int] = None
+    max_turns: Optional[int] = None
+    enable_tokenization_sanity_check: bool = True
     max_parallel_calls: int = 1
     max_tool_response_length: int = 256
     tool_response_truncate_side: str = "middle"
@@ -102,7 +104,13 @@ class RolloutConfig(BaseConfig):
     cudagraph_capture_sizes: Optional[list] = None
     free_cache_engine: bool = True
     tensor_model_parallel_size: int = 2
+    pipeline_model_parallel_size: int = 1
     max_num_batched_tokens: int = 8192
+    
+    # add by async-rl
+    enable_dual_buffer: bool = False
+    param_update_preduce_bucket_size_mb: int = 512
+    param_update_consume_bucket_size_mb: int = 128
 
     # TODO: enable train_kwargs
     # train_sampling_config: SamplingConfig = field(default_factory=SamplingConfig)
diff --git a/verl/workers/megatron_workers.py b/verl/workers/megatron_workers.py
index 1ef828f3570..f32e88c0af2 100644
--- a/verl/workers/megatron_workers.py
+++ b/verl/workers/megatron_workers.py
@@ -16,8 +16,10 @@
 """
 
 import datetime
+import asyncio
 import logging
 import os
+import ray
 import time
 from typing import Any, Optional
 
@@ -36,7 +38,8 @@
 
 from verl import DataProto
 from verl.single_controller.base import Worker
-from verl.single_controller.base.decorator import Dispatch, make_nd_compute_dataproto_dispatch_fn, register
+from verl.single_controller.base.decorator import Dispatch, make_nd_compute_dataproto_dispatch_fn, Execute, register
+from verl.single_controller.base.megatron.worker import MegatronWorker
 from verl.utils import hf_tokenizer
 from verl.utils.checkpoint.megatron_checkpoint_manager import MegatronCheckpointManager
 from verl.utils.config import omega_conf_to_dataclass
@@ -45,8 +48,10 @@
 from verl.utils.fs import copy_to_local
 from verl.utils.megatron_utils import (
     load_megatron_model_to_gpu,
+    load_megatron_model_grad_to_gpu,
     load_megatron_optimizer,
     offload_megatron_model_to_cpu,
+    offload_megatron_model_grad_to_cpu,
     offload_megatron_optimizer,
 )
 from verl.utils.memory_utils import aggressive_empty_cache
@@ -246,8 +251,96 @@ def __init__(self, config: DictConfig, role: str, **kwargs):
         self._is_offload_grad = False
         self._is_offload_optimizer = False
 
+        def enable_megatron_sequence_parallel():
+            if self._is_actor and self.config.actor.megatron.sequence_parallel:
+                return True
+            if self._is_ref and self.config.ref.megatron.sequence_parallel:
+                return True
+            return False
+
+        def tp():
+            if self._is_actor:
+                return self.config.actor.megatron.tensor_model_parallel_size
+            if self._is_ref:
+                return self.config.ref.megatron.tensor_model_parallel_size
+            return 1
+        def pp():
+            if self._is_actor:
+                return self.config.actor.megatron.pipeline_model_parallel_size
+            if self._is_ref:
+                return self.config.ref.megatron.pipeline_model_parallel_size
+            return 1
+        def vpp():
+            _vpp = None
+            if self._is_actor:
+                _vpp = self.config.actor.megatron.virtual_pipeline_model_parallel_size
+            if self._is_ref:
+                _vpp = self.config.ref.megatron.virtual_pipeline_model_parallel_size
+            if type(_vpp) is str:
+                _vpp = eval(_vpp)
+            return _vpp
+        def cp():
+            if self._is_actor:
+                return self.config.actor.megatron.context_parallel_size
+            if self._is_ref:
+                return self.config.ref.megatron.context_parallel_size
+            return 1
+        def ep():
+            if self._is_actor:
+                return self.config.actor.megatron.expert_model_parallel_size
+            if self._is_ref:
+                return self.config.ref.megatron.expert_model_parallel_size
+            return 1
+        def etp():
+            _etp = None
+            if self._is_actor:
+                _etp = self.config.actor.megatron.expert_tensor_parallel_size
+            if self._is_ref:
+                _etp = self.config.ref.megatron.expert_tensor_parallel_size
+            if type(_etp) is str:
+                _etp = eval(_etp)
+            return _etp
+        
+        def seed():
+            if self._is_actor:
+                return self.config.actor.megatron.seed
+            if self._is_ref:
+                return self.config.ref.megatron.seed
+            return 42
+
+        self._is_sperated_arch = not (self._is_actor and self._is_rollout)
+
+        # NOTE(sgm): We utilize colocate WorkerGroup by default.
+        # As a result, Workers for different model share the same process.
+        # Therefore, we only require one distribute initialization.
+        # To utilize different parallel startegy in different models:
+        # 1, users should disable WorkerDict; 2.assign different ResourcePool to different models,
+        # 3. and apply the following patch in ray==2.10, https://github.com/ray-project/ray/pull/44385
+        if not torch.distributed.is_initialized():
+            rank = int(os.environ["LOCAL_RANK"])
+            from datetime import timedelta
+            torch.distributed.init_process_group(backend="nccl", timeout=timedelta(seconds=3600))
+
+            torch.cuda.set_device(rank)
+
+            if enable_megatron_sequence_parallel():
+                os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "1"
+            mpu.initialize_model_parallel(
+                tensor_model_parallel_size=tp(),
+                pipeline_model_parallel_size=pp(),
+                virtual_pipeline_model_parallel_size=vpp(),
+                pipeline_model_parallel_split_rank=None,
+                use_sharp=False,
+                context_parallel_size=cp(),
+                expert_model_parallel_size=ep(),
+                expert_tensor_parallel_size=etp(),
+                nccl_communicator_config_path=None,
+            )
+
+        set_random_seed(seed=seed())
+
         # normalize config
-        if self._is_actor and self._is_rollout:
+        if (self._is_actor and self._is_rollout) or (self._is_actor and self._is_sperated_arch):
             self.config.actor.ppo_mini_batch_size *= self.config.rollout.n
             self.config.actor.ppo_mini_batch_size //= mpu.get_data_parallel_world_size()
             if self.config.actor.get("ppo_micro_batch_size", None):
@@ -256,7 +349,8 @@ def __init__(self, config: DictConfig, role: str, **kwargs):
                 self.config.actor.ppo_micro_batch_size_per_gpu = self.config.actor.ppo_micro_batch_size
                 self.config.rollout.log_prob_micro_batch_size_per_gpu = self.config.rollout.log_prob_micro_batch_size
 
-            self._is_offload_param = self.config.actor.megatron.get("param_offload", False)
+            if not self._is_sperated_arch:
+                self._is_offload_param = self.config.actor.megatron.get("param_offload", False)
             self._is_offload_grad = self.config.actor.megatron.get("grad_offload", False)
             self._is_offload_optimizer = self.config.actor.megatron.get("optimizer_offload", False)
         elif self._is_ref:
@@ -270,17 +364,48 @@ def __init__(self, config: DictConfig, role: str, **kwargs):
                 )
             self._ref_is_offload_param = self.config.ref.megatron.get("param_offload", False)
 
+    @register(dispatch_mode=Dispatch.ONE_TO_ALL, execute_mode=Execute.ALL, blocking=True)
+    def setup_for_queue(self, queue):
+        self._store_refs_queue = queue
+        if hasattr(self, "param_update_manager"):
+            self.param_update_manager.setup_for_queue(queue)
+
+
+    @register(dispatch_mode=Dispatch.ONE_TO_ALL, execute_mode=Execute.ALL, blocking=True)
+    def check_for_ray_col(self, col_name="actor_rollout_sync"):
+        from ray.util.collective import get_rank, get_collective_group_size, is_group_initialized
+        ray_col_name = col_name
+        ok = is_group_initialized(group_name=ray_col_name)
+        rank = get_rank(group_name=ray_col_name)
+        size = get_collective_group_size(group_name=ray_col_name)
+        worker_id = ray.get_runtime_context().get_worker_id()
+        print(f"check_for_ray_col ok: {ok}, rank: {rank}, self.rank:{self.rank}, size: {size}, worker_id: {worker_id}")
+        
+    @register(dispatch_mode=Dispatch.ONE_TO_ALL, execute_mode=Execute.ALL, blocking=True)
+    def setup_for_ray_col(self, rank_offset, size, name, backend="nccl"):
+        engine_tp_size = self.config.rollout.tensor_model_parallel_size
+        # TODO: support PP/EP
+        engine_nums = (size - rank_offset) // engine_tp_size
+        is_engine_master = self.rank % engine_tp_size == 0
+        self.param_update_manager.setup_for_ray_col(self.rank + rank_offset, size, rank_offset, engine_nums, engine_tp_size, is_engine_master, name, backend)
+
+        return True
+
     def _build_model_optimizer(
         self, model_path, optim_config, override_model_config, override_transformer_config, override_ddp_config=None
     ):
+        from megatron.core.models.gpt.gpt_model import ModelType
+
+        from verl.utils.megatron.optimizer import get_megatron_optimizer
+        from verl.utils.megatron_utils import get_model
+        from verl.utils.model import get_generation_config, print_model_size
+
         from verl.utils.megatron.optimizer import (
             get_megatron_optimizer,
             get_megatron_optimizer_param_scheduler,
             init_megatron_optim_config,
         )
         from verl.utils.megatron_utils import McoreModuleWrapperConfig, make_megatron_module
-        from verl.utils.model import get_generation_config, print_model_size
-
         self._init_hf_config_and_tf_config(
             model_path,
             model_path,
@@ -288,10 +413,17 @@ def _build_model_optimizer(
             override_model_config,
             override_transformer_config,
             self.config.model.get("trust_remote_code", False),
-            self.config.actor.megatron.use_mbridge,
+            self.config.actor.megatron.use_mbridge if hasattr(self.config.actor.megatron, "use_mbridge") else False,
         )
         self.generation_config = get_generation_config(self.local_path)
 
+        def megatron_actor_model_provider(pre_process, post_process):
+            from verl.models.mcore import init_mcore_model
+
+            parallel_model = init_mcore_model(self.tf_config, self.hf_config, pre_process, post_process, share_embeddings_and_output_weights=self.share_embeddings_and_output_weights, value=False, freeze_moe_router=override_model_config.get("moe_config", {}).get("freeze_moe_router", False))
+            parallel_model.cuda()
+            return parallel_model
+ 
         if self._is_actor or self._is_rollout:
             wrap_config = McoreModuleWrapperConfig(
                 is_value_model=False,  # actor is not value model
@@ -322,6 +454,25 @@ def _build_model_optimizer(
                             self.config, self.hf_config, actor_module, params_dtype=self.dtype, is_value_model=False
                         )
 
+            if self.rank == 0:
+                print_model_size(actor_module[0])
+            log_gpu_memory_usage("After MegatronPPOActor init", logger=logger)
+        elif self._is_actor:
+            from verl.utils.megatron_utils import start_record_memory_history, stop_record_memory_history
+            # start_record_memory_history()
+            actor_module = get_model(
+                megatron_actor_model_provider,
+                model_type=ModelType.encoder_or_decoder,
+                wrap_with_ddp=True,
+                use_distributed_optimizer=self.config.actor.megatron.use_distributed_optimizer,
+            )
+            # stop_record_memory_history()
+            if self.config.actor.load_weight:
+                if self.config.actor.megatron.use_dist_checkpointing:
+                    load_mcore_dist_weights(actor_module, self.config.actor.megatron.dist_checkpointing_path, is_value_model=False)
+                else:
+                    load_megatron_gptmodel_weights(self.config, self.hf_config, actor_module, params_dtype=self.dtype, is_value_model=False)
+
             if self.rank == 0:
                 print_model_size(actor_module[0])
             log_gpu_memory_usage("After MegatronPPOActor init", logger=logger)
@@ -356,6 +507,9 @@ def _build_model_optimizer(
                         )
             log_gpu_memory_usage("After ref module init", logger=logger)
             return ref_module, self.hf_config
+        else:
+            # rollout only
+            actor_module = None
 
         # TODO: add more optimizer args into config
         if self._is_actor:
@@ -373,9 +527,51 @@ def _build_model_optimizer(
 
         return actor_module, actor_optimizer, actor_optimizer_scheduler, self.hf_config, optim_config
 
+    def _build_param_update_manager(self):
+        from verl.workers.param_update.param_update import ParamUpdateManager
+        if self._is_actor or self._is_rollout:
+            # we only need param update manager for actor and rollout
+            
+            # TODO: copy from _build_rollout
+            layer_name_mapping = {
+                "qkv_layer_name": "self_attention.linear_qkv.",
+                "gate_proj_layer_name": "linear_fc1.weight",
+            }
+
+            from verl.models.mcore import get_mcore_weight_converter
+
+            weight_converter = get_mcore_weight_converter(self.actor_model_config, self.dtype)
+
+            actor_module=self.actor.actor_module if self._is_actor else None
+
+            param_update_manager = ParamUpdateManager(
+                model_params=actor_module,
+                model_config=self.actor_model_config,
+                weight_converter=weight_converter,
+                transformer_config=self.tf_config,
+                layer_name_mapping=layer_name_mapping,
+                convert_qkv_gate_up_by_simple_split=True,
+                param_update_preduce_bucket_size_mb=self.config.actor.get("param_update_preduce_bucket_size_mb", 512),
+            )
+            
+            # Register actor clusters (if configured)
+            if hasattr(self.config, 'actor_clusters'):
+                train_ranks = self.config.actor_clusters.get('train_ranks', [])
+                generate_ranks = self.config.actor_clusters.get('generate_ranks', [])
+                world_size = self.config.actor_clusters.get('world_size', self.world_size)
+                
+                if train_ranks and generate_ranks:
+                    param_update_manager.register_actor_clusters(train_ranks, generate_ranks, world_size)
+                    print(f"Registered actor clusters: train_ranks={train_ranks}, generate_ranks={generate_ranks}, world_size={world_size}")
+        else:
+            # ref does not need param update manager
+            param_update_manager = None
+        return param_update_manager
+        
+
     def _build_rollout(self, trust_remote_code=False):
         from torch.distributed.device_mesh import init_device_mesh
-
+        
         layer_name_mapping = {
             "qkv_layer_name": "self_attention.linear_qkv.",
             "gate_proj_layer_name": "linear_fc1.",
@@ -449,7 +645,7 @@ def _build_rollout(self, trust_remote_code=False):
             # For this reason, sharding_manager.__init__ should not import FSDPSGLangShardingManager and we import it
             # here use the abs path.
             # check: https://github.com/sgl-project/sglang/blob/00f42707eaddfc2c0528e5b1e0094025c640b7a0/python/sglang/srt/layers/quantization/fp8_utils.py#L76
-            from verl.workers.sharding_manager.megatron_sglang import MegatronSGLangShardingManager
+            from verl.workers.sharding_manager.megatron_sglang import MegatronSGLangShardingManager, MegatronSGLangAsyncShardingManager
 
             infer_tp = self.config.rollout.tensor_model_parallel_size
             dp = self.world_size // infer_tp
@@ -474,14 +670,16 @@ def _build_rollout(self, trust_remote_code=False):
                 model_hf_config=self.actor_model_config,
                 trust_remote_code=trust_remote_code,
                 device_mesh=rollout_device_mesh,
+                param_update_manager=self.param_update_manager,
             )
             log_gpu_memory_usage(f"After building {self.config.rollout.name} rollout", logger=None)
 
             from verl.models.mcore import get_mcore_weight_converter
 
             weight_converter = get_mcore_weight_converter(self.actor_model_config, self.dtype)
-            sharding_manager = MegatronSGLangShardingManager(
-                actor_module=self.actor.actor_module,
+            sharding_manager_cls = MegatronSGLangAsyncShardingManager if self._is_sperated_arch else MegatronSGLangShardingManager
+            sharding_manager = sharding_manager_cls(
+                actor_module=self.actor.actor_module if self._is_actor else None,
                 inference_engine=rollout._engine,
                 model_config=self.actor_model_config,
                 rollout_config=self.config.rollout,
@@ -495,9 +693,9 @@ def _build_rollout(self, trust_remote_code=False):
             log_gpu_memory_usage("After building sharding manager", logger=logger)
         else:
             raise NotImplementedError("Only vllmRollout is supported with Megatron now")
-        print(f"rollout and sharding manager init done sharding_manager: {sharding_manager}")
         return rollout, sharding_manager
 
+
     @register(dispatch_mode=Dispatch.ONE_TO_ALL)
     def init_model(self):
         if self.config.model.get("external_lib", None) is not None:
@@ -522,6 +720,8 @@ def init_model(self):
             )
         else:
             override_transformer_config = {}
+            override_ddp_config = {}
+
         self.param_dtype = torch.bfloat16
         log_gpu_memory_usage("Before init actor model and optimizer", logger=logger)
         self.dtype = PrecisionType.to_dtype(self.param_dtype)
@@ -544,6 +744,9 @@ def init_model(self):
             if self._is_offload_param:
                 offload_megatron_model_to_cpu(self.actor_module)
                 log_gpu_memory_usage("After offload actor params and grad during init", logger=logger)
+            elif self._is_offload_grad:
+                offload_megatron_model_grad_to_cpu(self.actor_module)
+                log_gpu_memory_usage("After offload actor params grad during init", logger=logger)
             if self._is_offload_optimizer:
                 offload_megatron_optimizer(self.actor_optimizer)
                 log_gpu_memory_usage("After offload actor optimizer during init", logger=logger)
@@ -557,9 +760,16 @@ def init_model(self):
                 tf_config=self.tf_config,
                 actor_module=self.actor_module,
                 actor_optimizer=self.actor_optimizer,
+                tokenizer=self.tokenizer,
             )
             log_gpu_memory_usage("After MegatronPPOActor init", logger=logger)
 
+        self.param_update_manager = None
+        if self._is_sperated_arch and (self._is_actor or self._is_rollout):
+            # we need the param update manager for actor and rollout
+            self.param_update_manager = self._build_param_update_manager()
+            log_gpu_memory_usage("After param update manager init", logger=logger)
+
         if self._is_rollout:
             self.rollout, self.sharding_manager = self._build_rollout(
                 trust_remote_code=self.config.model.get("trust_remote_code", False)
@@ -583,6 +793,7 @@ def init_model(self):
                 tf_config=self.tf_config,
                 actor_module=self.ref_module,
                 actor_optimizer=None,
+                tokenizer=self.tokenizer,
             )
             if self._ref_is_offload_param:
                 offload_megatron_model_to_cpu(self.ref_module)
@@ -591,6 +802,7 @@ def init_model(self):
         if self._is_actor:
             self.flops_counter = FlopsCounter(self.actor_model_config)
             self.checkpoint_mananager = MegatronCheckpointManager(
+                tf_config=self.tf_config,
                 config=self.config,
                 checkpoint_config=self.config.actor.checkpoint,
                 model_config=self.actor_model_config,
@@ -617,18 +829,28 @@ def init_model(self):
     @DistProfiler.annotate(color="red")
     def update_actor(self, data: DataProto):
         assert self._is_actor
+        import time
         if self._is_offload_param:
+            log_gpu_memory_usage("Before load actor params and grad during update_actor", logger=logger)
             load_megatron_model_to_gpu(self.actor_module)
             log_gpu_memory_usage("After load actor params and grad during update_actor", logger=logger)
+        elif self._is_offload_grad:
+            log_gpu_memory_usage("Before load actor params during update_actor", logger=logger)
+            load_megatron_model_grad_to_gpu(self.actor_module)
+            log_gpu_memory_usage("After load actor params during update_actor", logger=logger)
         if self._is_offload_optimizer:
+            log_gpu_memory_usage("Before load actor optimizer during update_actor", logger=logger)
             load_megatron_optimizer(self.actor_optimizer)
             log_gpu_memory_usage("After load actor optimizer during update_actor", logger=logger)
 
         micro_batch_size = self.config.actor.ppo_micro_batch_size_per_gpu
         data.meta_info["micro_batch_size"] = micro_batch_size
+
         dataloader = self.actor.make_minibatch_iterator(data=data)
+
         with Timer(name="update_policy", logger=None) as timer:
             metrics = self.actor.update_policy(dataloader=dataloader)
+
         delta_time = timer.last
         global_num_tokens = data.meta_info["global_token_num"]
         estimated_flops, promised_flops = self.flops_counter.estimate_flops(global_num_tokens, delta_time)
@@ -648,6 +870,9 @@ def update_actor(self, data: DataProto):
         if self._is_offload_param:
             offload_megatron_model_to_cpu(self.actor_module)
             log_gpu_memory_usage("After offload actor params and grad during update_actor", logger=logger)
+        elif self._is_offload_grad:
+            offload_megatron_model_grad_to_cpu(self.actor_module)
+            log_gpu_memory_usage("After offload actor params during update_actor", logger=logger)
         if self._is_offload_optimizer:
             offload_megatron_optimizer(self.actor_optimizer)
             log_gpu_memory_usage("After offload actor optimizer during update_actor", logger=logger)
@@ -655,6 +880,63 @@ def update_actor(self, data: DataProto):
         aggressive_empty_cache(force_sync=True)
         return output
 
+    
+    @register(dispatch_mode=Dispatch.ONE_TO_ALL, execute_mode=Execute.ALL, blocking=False)
+    @GPUMemoryLogger(role="sync_per_tensor_generator", logger=logger)
+    def sync_per_tensor_generator(self):
+        """
+        This is used to synchronize the model parameters between the actor and the rollout.
+        It is used to ensure that the rollout has the latest model parameters.
+        """
+        assert self._is_rollout or self._is_actor
+        if self._is_actor:
+            update_weight_func_call = None
+        else:
+            # is_rollout
+            update_weight_func_call = self.rollout.get_update_weight_func()
+        self.param_update_manager.sync_per_tensor_generator(update_weight_func_call)
+
+    @register(dispatch_mode=Dispatch.ONE_TO_ALL, execute_mode=Execute.ALL, blocking=False)
+    @GPUMemoryLogger(role="async_param_update", logger=logger)
+    def async_param_update(self):
+        if not hasattr(self.param_update_manager, "async_param_update"):
+            return
+        assert self._is_rollout or self._is_actor
+        if self._is_actor:
+            update_weight_func_call = None
+        else:
+            # is_rollout
+            update_weight_func_call = self.rollout.get_update_weight_func()
+        self.param_update_manager.async_param_update(update_weight_func_call)
+
+    @register(dispatch_mode=Dispatch.ONE_TO_ALL, execute_mode=Execute.ALL, blocking=False)
+    @GPUMemoryLogger(role="wait_for_send_complete", logger=logger)
+    def wait_for_send_complete(self):
+        if hasattr(self.param_update_manager, "wait_for_send_complete"):
+            self.param_update_manager.wait_for_send_complete()
+
+    @register(dispatch_mode=Dispatch.ALL_TO_ALL)
+    @GPUMemoryLogger(role="get_params_meta", logger=logger)
+    def get_params_meta(self):
+        """
+        Get the meta information of the model parameters.
+        This is used for loading the model parameters from the checkpoint.
+        """
+        assert self._is_rollout or self._is_actor
+        params_meta = self.param_update_manager.get_params_meta()
+        return params_meta
+        
+    @register(dispatch_mode=Dispatch.ALL_TO_ALL)
+    @GPUMemoryLogger(role="set_params_meta", logger=logger)
+    def set_params_meta(self, params_meta):
+        """
+        Set the meta information of the model parameters.
+        This is used for loading the model parameters from the checkpoint.
+        """
+        assert self._is_rollout or self._is_actor
+        self.param_update_manager.set_params_meta(params_meta)
+        self.rollout.set_params_meta(params_meta)
+
     @register(dispatch_mode=make_nd_compute_dataproto_dispatch_fn(mesh_name="rollout"))
     @GPUMemoryLogger(role="generate_sequences", logger=logger)
     @DistProfiler.annotate(color="red")
@@ -696,10 +978,64 @@ def generate_sequences(self, prompts: DataProto):
         )
         output.meta_info["timing"] = timing_generate
         output = output.to("cpu")
+
+        # clear kv cache
+        aggressive_empty_cache(force_sync=True)
+        return output
+
+    @register(dispatch_mode=make_nd_compute_dataproto_dispatch_fn(mesh_name="actor"))
+    @GPUMemoryLogger(role="generate_sequences_sperated", logger=logger)
+    def generate_sequences_sperated(self, prompts: DataProto):
+        assert self._is_rollout
+        prompts.batch = prompts.batch.cuda()
+        meta_info = {
+            "eos_token_id": self.generation_config.eos_token_id if self.generation_config is not None else self.tokenizer.eos_token_id,
+            "pad_token_id": self.generation_config.pad_token_id if self.generation_config is not None else self.tokenizer.pad_token_id,
+        }
+        prompts.meta_info.update(meta_info)
+
+        with self.sharding_manager:
+            log_gpu_memory_usage("After entering sharding manager", logger=logger)
+
+            # (zhangchi.usc1992) wake up kv cache here. Currently only support vllm.
+            # Will support sglang once separate wakeup of model weights and kv cache is supported
+            # This API should be exposed by the rollout. Will rewrite this part when we refactor after v0.4 release.
+            # Currently, we hack here to support running large models (QWen3-236b and DeepSeek-671b)
+            if self.config.rollout.name == "vllm":
+                import inspect
+
+                if "tags" in inspect.signature(self.rollout.inference_engine.wake_up).parameters:
+                    self.rollout.inference_engine.wake_up(tags=["kv_cache"])
+
+            prompts = self.sharding_manager.preprocess_data(prompts)
+            output = self.rollout.generate_sequences(prompts=prompts)
+            output = self.sharding_manager.postprocess_data(output)
+
+        output = output.to("cpu")
+
         # clear kv cache
         aggressive_empty_cache(force_sync=True)
         return output
 
+    @register(dispatch_mode=Dispatch.ALL_TO_ALL, blocking=False)
+    @GPUMemoryLogger(role="async_generate_sequences", logger=logger)
+    async def async_generate_sequences(self, input_queue, output_queue):
+        """
+        This is a async version of generate_sequences, used for testing.
+        """
+        # loop = asyncio.get_event_loop()
+        # return loop.run_until_complete(self.generate_sequences(prompts))
+        
+        async def _async_generate(input_queue, output_queue):
+            prompts = input_queue.get()
+            # print(f"async_generate_sequences before get prompts from input_queue:{input_queue.qsize()}")
+            output = self.generate_sequences(prompts)
+            # print(f"async_generate_sequences after get prompts from input_queue:{input_queue.qsize()}")
+            output_queue.put(output)
+            # print(f"async_generate_sequences after put output to output_queue:{output_queue.qsize()}")
+        # asyncio.run(_async_generate(input_queue, output_queue))
+        await _async_generate(input_queue, output_queue)
+
     @register(dispatch_mode=make_nd_compute_dataproto_dispatch_fn(mesh_name="actor"))
     @GPUMemoryLogger(role="compute_ref_log_prob", logger=logger)
     @DistProfiler.annotate(color="olive")
@@ -708,6 +1044,7 @@ def compute_ref_log_prob(self, data: DataProto):
         if self._ref_is_offload_param:
             load_megatron_model_to_gpu(self.ref_module, load_grad=False)
             log_gpu_memory_usage("After load ref params and grad during compute_ref_log_prob", logger=logger)
+
         micro_batch_size = self.config.ref.log_prob_micro_batch_size_per_gpu
         data.meta_info["micro_batch_size"] = micro_batch_size
         data.meta_info["max_token_len"] = self.config.ref.log_prob_max_token_len_per_gpu
@@ -927,7 +1264,7 @@ def _build_critic_model_optimizer(
             override_model_config,
             override_transformer_config,
             self.config.model.get("trust_remote_code", False),
-            self.config.megatron.use_mbridge,
+            self.config.megatron.use_mbridge if hasattr(self.config.megatron, "use_mbridge") else False,
         )
 
         wrap_config = McoreModuleWrapperConfig(
@@ -1026,6 +1363,7 @@ def init_model(self):
         )
         self.flops_counter = FlopsCounter(self.critic_model_config)
         self.checkpoint_mananager = MegatronCheckpointManager(
+            tf_config=self.tf_config,
             config=self.config,
             checkpoint_config=self.config.checkpoint,
             model_config=self.critic_model_config,
@@ -1192,7 +1530,7 @@ def _build_rm_model(self, model_path, tokenizer, override_model_config, override
             override_model_config,
             override_transformer_config,
             self.config.model.get("trust_remote_code", False),
-            self.config.megatron.use_mbridge,
+            self.config.megatron.use_mbridge if hasattr(self.config.megatron, "use_mbridge") else False,
         )
 
         wrap_config = McoreModuleWrapperConfig(
diff --git a/verl/workers/param_update/__init__.py b/verl/workers/param_update/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/verl/workers/param_update/param_update.py b/verl/workers/param_update/param_update.py
new file mode 100644
index 00000000000..ff877e64cdf
--- /dev/null
+++ b/verl/workers/param_update/param_update.py
@@ -0,0 +1,750 @@
+import asyncio
+import time
+import os
+import torch
+import ray
+import threading
+import queue
+from enum import Enum, auto
+from typing import Optional, Any, Dict, List
+from verl.workers.param_update.ray_async_communication import (
+    cross_process_ray_put,
+    cross_process_ray_get,
+)
+
+from verl.trainer.ppo.pipeline.pipeline_utils import enhanced_print
+from verl.utils.megatron_utils import per_tensor_generator, per_tensor_generator_bucketed
+
+
+# ============================================================================
+# ParamUpdateManager class
+# ============================================================================
+class ParamUpdateManager:
+    def __init__(
+        self, 
+        model_params,
+        model_config,
+        weight_converter,
+        transformer_config,
+        layer_name_mapping,
+        convert_qkv_gate_up_by_simple_split,
+        enable_async_rl: bool = True,  # New: whether to enable async RL optimization
+        target_device: str = "cpu",  # New: target device, default is CPU
+        enable_param_sync: bool = True,  # New: whether to enable parameter synchronization
+        store_refs_queue = None,  # New: store refs queue
+        param_update_preduce_bucket_size_mb: int = 512,  # New: parameter preprocessing bucket size
+    ):
+        # Basic configuration
+        self.model_params = model_params
+        self.model_config = model_config
+        self.weight_converter = weight_converter
+        self.transformer_config = transformer_config
+        self.layer_name_mapping = layer_name_mapping
+        self.target_device = target_device
+        self.enable_param_sync = enable_param_sync
+        self.store_refs_queue = store_refs_queue
+        
+        # Async RL optimization switch
+        self.enable_async_rl = enable_async_rl
+        
+        # Log control switch
+        self.verbose_logging = os.environ.get('PARAM_UPDATE_VERBOSE_LOG', 'false').lower() == 'true'
+        
+        # Debug switch: skip func_call execution
+        self.skip_func_call = os.environ.get('SKIP_FUNC_CALL', '0') == '1'
+        
+        # Set two bucket sizes separately
+        # 1. Send stage bucket fusion granularity (train stage memory) - can be set larger
+        self.send_bucket_size_mb = param_update_preduce_bucket_size_mb
+        
+        enhanced_print("ParamUpdateManager", None, f"Bucket sizes: send={self.send_bucket_size_mb}MB")
+        
+        # Initialize parameter metadata
+        self._params_meta = []
+        
+        # Set default Ray collective name
+        self.ray_col_name = "actor_rollout_sync"  # Dedicated parameter sync group name, avoid conflict with SGLang communication
+        
+        # Initialize async RL optimization components
+        if self.enable_async_rl:
+            self._init_async_rl_components()
+        
+        self._debug_mode = False  # Debug mode switch
+        
+        if self.skip_func_call:
+            enhanced_print("param_update", None, "⚠️ SKIP_FUNC_CALL enabled - func_call execution will be skipped for debugging")
+    
+    def setup_for_queue(self, queue):
+        self.store_refs_queue = queue
+
+    def _init_async_rl_components(self):
+        """Initialize async RL optimization components"""
+        # Version counter
+        self.current_version = -1
+        
+        enhanced_print("ParamUpdateManager", None, "Initialized async RL components")
+
+    def get_async_rl_stats(self) -> Dict[str, Any]:
+        """Get async RL statistics"""
+        if not self.enable_async_rl:
+            return {"enabled": False}
+        
+        return {
+            "enabled": True,
+            "current_version": self.current_version
+        }
+    
+    def set_model_params(self, model_params):
+        """Set model parameters - for dynamic setting"""
+        self.model_params = model_params
+        enhanced_print("param_update", None, f"Model params set: {type(model_params)}")
+    
+    def is_train_node(self) -> bool:
+        """Check if it's a training node"""
+        return self.model_params is not None
+    
+    def is_train_master_node(self):
+        """Check if it's a training master node"""
+        return self.is_train_node() and self.rank == 0
+
+    def is_generation_node(self) -> bool:
+        """Check if it's a generation node"""
+        return self.model_params is None
+
+    def is_generation_master_node(self):
+        """Check if it's a generation master node"""
+        return self.is_generation_node() and self.is_engine_master
+    
+    def engine_idx(self):
+        """for global queue"""
+        return (self.rank - self.rank_offset) // self.engine_tp_size
+    
+    def one_to_all_nums(self):
+        """for global queue"""
+        return self.engine_nums
+    
+    def setup_for_ray_col(self, rank: int, size: int, rank_offset: int, engine_nums: int, engine_tp_size: int, is_engine_master: bool, name: str, backend: str = "nccl"):
+        """Setup Ray communication - choose different communication methods based on parameter sync mode"""
+        try:
+            self.ray_col_name = name
+            self.rank = rank
+            self.size = size
+            self.rank_offset = rank_offset
+            self.engine_nums = engine_nums
+            self.is_engine_master = is_engine_master
+            self.engine_tp_size = engine_tp_size
+            
+            # Choose communication method based on parameter sync mode
+            if self.enable_param_sync:
+                # Async mode: use Ray put/get
+                enhanced_print("param_update", None, f"Ray put/get communication ready: rank={rank}, size={size}, name={name}")
+            else:
+                # Sync mode: use Ray Collective
+                from ray.util.collective import init_collective_group
+                init_collective_group(
+                    group_name=name,
+                    world_size=size,
+                    rank=rank,
+                    backend=backend,
+                )
+                enhanced_print("param_update", None, f"Ray Collective communication ready: rank={rank}, size={size}, name={name}, backend={backend}")
+            
+            return True
+                
+        except Exception as e:
+            enhanced_print("param_update", None, f"Failed to setup Ray communication: {e}")
+            return False
+    
+    def register_actor_clusters(self, train_ranks: List[int], generate_ranks: List[int], world_size: int):
+        """Register train and generate clusters"""
+        self.train_ranks = train_ranks
+        self.generate_ranks = generate_ranks
+        self.world_size = world_size
+        
+        enhanced_print("param_update", None, f"Registered actor clusters: train_ranks={train_ranks}, generate_ranks={generate_ranks}")
+    
+    def get_communication_info(self) -> Dict[str, Any]:
+        """Get communication information"""
+        return {
+            "sync_mode": not self.enable_param_sync,
+            "train_ranks": getattr(self, 'train_ranks', []),
+            "generate_ranks": getattr(self, 'generate_ranks', []),
+            "world_size": getattr(self, 'world_size', 0)
+        }
+    
+    def setup_logp_ref_logp_sync(self, logp_rank, ref_logp_rank, size):
+        """Setup parameter synchronization for logp and ref_logp workers"""
+        if not self.enable_param_sync:
+            # Sync mode: use Ray Collective
+            from ray.util.collective import init_collective_group
+            
+            # Create independent communication groups for logp and ref_logp
+            logp_group_name = "logp_ref_logp_sync"
+            init_collective_group(
+                group_name=logp_group_name,
+                world_size=size,
+                rank=logp_rank,
+                backend="nccl",
+            )
+            self.logp_ray_col_name = logp_group_name
+            
+            # Create independent communication group for ref_logp
+            ref_logp_group_name = "ref_logp_sync"
+            init_collective_group(
+                group_name=ref_logp_group_name,
+                world_size=size,
+                rank=ref_logp_rank,
+                backend="nccl",
+            )
+            self.ref_logp_ray_col_name = ref_logp_group_name
+            
+            enhanced_print("param_update", None, f"Setup logp/ref_logp sync: logp_group={logp_group_name}, ref_logp_group={ref_logp_group_name}")
+        else:
+            # Async mode: no additional communication group setup needed
+            enhanced_print("param_update", None, "Async mode: no additional communication groups needed for logp/ref_logp")
+    
+    def get_params_iter(self, target_device="cpu", use_bucketed=False):
+        """Get an iterator for the current parameters."""
+        # Check if model_params exists (only train nodes have)
+        if self.model_params is None:
+            enhanced_print("param_update", None, f"get_params_iter: model_params is None, this is not a train node, returning None")
+            return None
+        
+        # Check if bucket optimization is enabled
+        bucket_size_mb = self.send_bucket_size_mb
+        
+        if use_bucketed:
+            # Use bucket granularity optimized version with cache
+            enhanced_print("param_update", None, f"Using bucketed per_tensor_generator with bucket size {bucket_size_mb}MB")
+            return self._get_params_iter_bucketed(target_device, bucket_size_mb)
+        else:
+            # Use original version
+            enhanced_print("param_update", None, "Using original per_tensor_generator")
+            per_tensor_param = per_tensor_generator(
+                self.model_params,
+                self.model_config,
+                self.weight_converter,
+                self.transformer_config,
+                self.layer_name_mapping,
+                target_device=target_device,
+            )
+        
+        return per_tensor_param
+        
+
+    def _get_params_iter_bucketed(self, target_device, bucket_size_mb):
+        """Simplified bucketed parameter iterator - no prefetch, directly return tensor data"""
+        # Get tensor data
+        bucket_generator = per_tensor_generator_bucketed(
+            self.model_params,
+            self.model_config,
+            self.weight_converter,
+            self.transformer_config,
+            self.layer_name_mapping,
+            target_device=target_device,
+            bucket_size_mb=bucket_size_mb,
+        )
+        
+        # Directly return tensor data, no prefetch
+        return bucket_generator
+    
+    def _get_params_iter_bucketed_enhanced(self, target_device, bucket_size_mb):
+        """Enhanced version: get bucketed parameter iterator, including params_meta and global mapping"""
+        # Get parameter metadata
+        if not self._params_meta:
+            self.get_params_meta()
+        
+        # Check if params_meta is empty
+        if not self._params_meta:
+            enhanced_print("param_update", None, "Warning: params_meta is empty, returning empty results")
+            return {}, [], 0
+        
+        # Use simplified grouping strategy
+        groups, group_tensor_count = self._group_tensors_by_metas()
+        
+        # Check if groups is empty
+        if not groups:
+            enhanced_print("param_update", None, "Warning: groups is empty, returning empty results")
+            return {}, [], 0
+        
+        # Create global name to tensor mapping
+        global_tensor_map = {}
+        
+        if self.model_params is None:
+            # Model params empty, return empty result, only need groups info
+            return global_tensor_map, groups, group_tensor_count
+        
+        # Get tensor data
+        per_tensor_param = self._get_params_iter_bucketed(target_device, bucket_size_mb)
+        
+        # Build global mapping
+        for name, tensor in per_tensor_param:
+            if tensor is not None:
+                global_tensor_map[name] = tensor.to(target_device)
+        
+        enhanced_print("param_update", None, f"Created global tensor map with {len(global_tensor_map)} tensors")
+        
+        # Return enhanced iterator with meta info and global mapping
+        return global_tensor_map, groups, group_tensor_count
+    
+
+    def clear_gpu_cache(self):
+        """Clear GPU cache, release GPU memory"""
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            enhanced_print("param_update", None, "Cleared GPU cache")
+    
+    def clear_all_cache(self):
+        """Clear all caches, including bucket cache and GPU cache"""
+        self.clear_bucketed_cache()
+        self.clear_gpu_cache()
+        enhanced_print("param_update", None, "Cleared all caches")
+    
+    def get_bucketed_cache_info(self):
+        """Get bucket cache information"""
+        if hasattr(self, '_bucketed_cache'):
+            cache_info = {}
+            for key, buckets in self._bucketed_cache.items():
+                cache_info[key] = {
+                    'bucket_count': len(buckets),
+                    'total_tensors': sum(len(bucket) for bucket in buckets)
+                }
+            return cache_info
+        return {}
+
+    def get_params_meta(self):
+        # Check if _params_meta is initialized and not empty
+        if hasattr(self, '_params_meta') and self._params_meta and len(self._params_meta) > 0:
+            return self._params_meta
+        
+        # Check if model_params is available
+        if self.model_params is None:
+            enhanced_print("param_update", None, "Warning: model_params is None, cannot get params meta")
+            return []
+        
+        per_tensor_param = self._get_params_iter_bucketed(self.target_device, self.send_bucket_size_mb)
+
+        if per_tensor_param is None:
+            enhanced_print("param_update", None, "Error: per_tensor_param returned None")
+            return []
+        
+        # Ensure _params_meta is initialized
+        if not hasattr(self, '_params_meta') or self._params_meta is None:
+            self._params_meta = []
+    
+        for key, tensor in per_tensor_param:
+            if tensor is not None:
+                meta = {
+                    "name": key,
+                    "shape": tensor.shape,
+                    "dtype": tensor.dtype,
+                    "size": tensor.numel() * tensor.element_size(),
+                }
+                self._params_meta.append(meta)
+            else:
+                # Handle None tensor
+                meta = {
+                    "name": key,
+                    "shape": (),
+                    "dtype": torch.float32,
+                    "size": 0,
+                }
+                self._params_meta.append(meta)
+
+        
+        enhanced_print("param_update", None, f"Generated {len(self._params_meta)} parameter metadata entries using bucketed generator")
+        return self._params_meta
+
+    def set_params_meta(self, params_meta):
+        """Set the parameters metadata."""
+        self._params_meta = params_meta
+
+    def preduce_per_tensor_generator(self, convert_generator_to_list=True):
+        """Asynchronously get the current parameters."""
+        per_tensor_param = self.get_params_iter("cpu", use_bucketed=True)
+        
+        if not convert_generator_to_list:
+            return per_tensor_param
+        
+        # convert the async generator to a list
+        param_list = []
+        for key, tensor in per_tensor_param:
+            param_list.append((key, tensor))
+        enhanced_print("param_update", None, f"Total tensors in per_tensor_generator: {len(param_list)}")
+        return param_list
+
+    def consume_per_tensor_generator(self, per_tensor_param, func_call):
+        """Consume the per_tensor_generator with a function call."""
+        # Check if per_tensor_param is None
+        if per_tensor_param is None:
+            enhanced_print("param_update", None, "Warning: per_tensor_param is None, skipping consumption")
+            return
+        
+        # Check if event loop exists
+        try:
+            loop = asyncio.get_event_loop()
+        except RuntimeError:
+            # If no event loop, create a new one
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+        
+        try:
+            # Directly call coroutine function
+            coro = func_call(per_tensor_param)
+            loop.run_until_complete(coro)
+        except StopIteration:
+            # Handle StopIteration exception
+            enhanced_print("param_update", None, "Warning: StopIteration in consume_per_tensor_generator, generator may be empty")
+        except Exception as e:
+            # Handle other exceptions
+            enhanced_print("param_update", None, f"Error in consume_per_tensor_generator: {e}")
+            import traceback
+            traceback.print_exc()
+            raise e
+
+    def sync_per_tensor_generator(self, func_call=None):
+        """Use new async parameter synchronization implementation"""
+        return self.async_param_update(func_call)
+
+    def _group_tensors_by_metas(self):
+        """Simplified tensor grouping strategy - use synchronized params_meta to ensure consistency"""
+        if not self._params_meta:
+            self.get_params_meta()
+        
+        # Check if params_meta is empty
+        if not self._params_meta:
+            enhanced_print("param_update", None, "Warning: params_meta is empty after get_params_meta()")
+            return []
+        
+        target_bucket_size = self.send_bucket_size_mb * 1024 * 1024
+        
+        # Simple grouping strategy, fill bucket in order
+        groups = []
+        current_group = []
+        current_size = 0
+        
+        for meta in self._params_meta:
+            if meta is None:
+                enhanced_print("param_update", None, "Warning: Found None meta in params_meta, skipping")
+                continue
+                
+            tensor_size = self._calculate_tensor_size(meta)
+            
+            # If adding this tensor exceeds target size, create new group
+            if current_size + tensor_size > target_bucket_size and current_group:
+                groups.append(current_group)
+                current_group = [meta]
+                current_size = tensor_size
+            else:
+                current_group.append(meta)
+                current_size += tensor_size
+        
+        # Add last group
+        if current_group:
+            groups.append(current_group)
+        
+        # Calculate statistics
+        group_sizes = []
+        group_tensor_count = 0
+        for group in groups:
+            total_size = sum(self._calculate_tensor_size(meta) for meta in group)
+            group_tensor_count += len(group)
+            group_sizes.append(total_size)
+        
+        if group_sizes:
+            avg_size = sum(group_sizes) / len(group_sizes)
+            # enhanced_print("param_update", None, f"Simplified grouping created {len(groups)} groups with Average group size: {avg_size/1024/1024:.1f}MB")
+        
+        return groups, group_tensor_count
+
+    def _calculate_tensor_size(self, meta):
+        """Calculate the size of a tensor."""
+        return meta["size"]
+
+    def async_param_update(self, func_call=None, sync_send=False):
+        """Async parameter update - start async send and receive"""
+        start_time = time.time()
+        # enhanced_print("param_update", None, "Starting async parameter update")
+        
+        # First get groups info, ensure send and recv use same grouping
+        if not self._params_meta:
+            self.get_params_meta()
+        
+        # If no func_call provided, use default update_buffer_data_only method
+        if func_call is None:
+            # Need to get correct func_call from external, temporarily use a placeholder
+            func_call = lambda named_tensors, version: True
+        
+        # Record start time for later statistics
+        self._param_update_start_time = start_time
+
+        # Start async send
+        self._start_async_send()
+        
+        # Wait a bit to ensure send thread starts working
+        time.sleep(0.1)
+        
+        # Start async receive
+        self._start_async_recv(func_call)
+
+        if sync_send:
+            self.wait_for_send_complete()
+
+    # Block wait for send to complete
+    def wait_for_send_complete(self):
+        if hasattr(self, '_async_send_thread') and self._async_send_thread.is_alive():
+            self._async_send_thread.join()
+
+    def _start_async_send(self):
+        """Start async send thread"""
+        if not hasattr(self, '_async_send_thread') or not self._async_send_thread.is_alive():
+            self._async_send_thread = threading.Thread(target=self._async_send_worker, daemon=True)
+            self._async_send_thread.start()
+            # enhanced_print("param_update", None, "Started async send worker thread")
+
+    def _start_async_recv(self, func_call):
+        """Start async recv thread"""
+        if not hasattr(self, '_async_recv_thread') or not self._async_recv_thread.is_alive():
+            self._async_recv_thread = threading.Thread(target=self._async_recv_worker, args=(func_call,), daemon=True)
+            self._async_recv_thread.start()
+            # enhanced_print("param_update", None, "Started async recv worker thread")
+
+    @torch.no_grad()
+    def _async_send_worker(self):
+        """Async send worker"""
+        enhanced_print("param_update", None, "Async send worker started")
+        
+        t1 = time.time()
+        
+        # Get parameter metadata
+        if not self._params_meta:
+            self.get_params_meta()
+        
+        # Use enhanced bucketed function to get tensor data and grouping info
+        global_tensor_map, groups, group_tensor_count = self._get_params_iter_bucketed_enhanced(self.target_device, bucket_size_mb=self.send_bucket_size_mb)
+        
+        if self.is_train_master_node() and (not global_tensor_map or not groups):
+            enhanced_print("param_update", None, "Async send: no tensor data or groups available")
+            return
+        
+        if not self.is_train_master_node():
+            # Non-send nodes can exit directly after syncing params
+            return
+        
+        # Use unified version (each async send advances global version once)
+        version = self.current_version + 1
+        self.current_version = version
+        
+        enhanced_print("param_update", None, f"Async send: processing {len(groups)} buckets (version:{version}) with {len(global_tensor_map)} tensors")
+        
+        success_count = 0
+        for i, group in enumerate(groups):
+            bucket_name = f"bucket_{i}"
+            
+            bucket_tensors = []
+            found_count = 0
+            for meta in group:
+                name = meta["name"]
+                if name in global_tensor_map:
+                    tensor = global_tensor_map[name]
+                    bucket_tensors.append((name, tensor))
+                    found_count += 1
+            
+            if not bucket_tensors:
+                enhanced_print("param_update", None, f"Async send: no tensors found for {bucket_name}")
+                continue
+            
+            success = self._async_send_bucket(bucket_tensors, 0, bucket_name, version=version)
+            if success:
+                success_count += 1
+                # enhanced_print("param_update", None, f"Async send: completed {bucket_name} (version {version})")
+            else:
+                enhanced_print("param_update", None, f"Async send: failed {bucket_name} (version {version})")
+            
+            if self.verbose_logging:
+                enhanced_print("param_update", None, f"Async send: {bucket_name} - {found_count}/{len(group)} tensors sent")
+        
+        t2 = time.time()
+        enhanced_print("param_update", None, f"Async send: completed {success_count}/{len(groups)} buckets, cost time:{t2-t1:.2f}, worker thread ending")
+
+    @torch.no_grad()
+    def _async_recv_worker(self, func_call):
+        """Async recv worker"""
+        enhanced_print("param_update", None, "Async recv worker started")
+        
+        t1 = time.time()
+        
+        if not self.is_generation_master_node():
+            # Non-generator master node, no receive operation
+            return True
+        
+        # Get parameter metadata
+        if not self._params_meta:
+            self.get_params_meta()
+        
+        # Use enhanced bucketed function to get grouping info
+        _, groups, group_tensor_count = self._get_params_iter_bucketed_enhanced(self.target_device, bucket_size_mb=self.send_bucket_size_mb)
+        
+        if not groups:
+            enhanced_print("param_update", None, "Async recv: no groups available")
+            return    
+        
+        # Initialize current version of receiver (will use version from queue)
+        version = self.current_version
+        thread_id = threading.get_ident()
+
+        enhanced_print("param_update", None, f"Async recv: processing {len(groups)} buckets ({group_tensor_count} tensors)")
+        # Process each bucket, receive in order - ensure same order as send
+        success_count = 0
+        for i, group in enumerate(groups):
+            bucket_name = f"bucket_{i}"
+            
+            # Get object_refs from store_refs_queue
+            store_ref = None
+            if self.store_refs_queue is not None:
+                # Get object_refs info from queue, use blocking mode
+                # enhanced_print("param_update", None, f"i:{i}, rank:{self.rank}, Async recv: waiting for queue_data for {bucket_name}, idx: {self.engine_idx()} queue size: {self.store_refs_queue[self.engine_idx()].qsize()}")
+                queue_data = self.store_refs_queue[self.engine_idx()].get()  # 阻塞模式，无timeout
+                if self.verbose_logging:
+                    enhanced_print("param_update", None, f"Async recv: got queue_data for {queue_data.get('bucket_name') if queue_data else 'None'}, expecting {bucket_name}")
+                
+                # Verify bucket_name order consistency
+                expected_bucket_name = bucket_name
+                actual_bucket_name = queue_data.get('bucket_name') if queue_data else None
+                
+                if actual_bucket_name != expected_bucket_name:
+                    assert False, f"ERROR: i:{i}, thread id: {thread_id}, rank:{self.rank}, engine_idx:{self.engine_idx()} Bucket order mismatch! Expected {expected_bucket_name}, got {actual_bucket_name}, exit"
+                
+                bucket_name = actual_bucket_name
+                store_ref = queue_data.get('object_refs') if queue_data else None
+                # Get version number for this bucket from queue
+                received_version = queue_data.get('version') if queue_data else None
+                if received_version is not None:
+                    version = received_version
+                if self.verbose_logging:
+                    enhanced_print("param_update", None, f"i:{i}, thread id: {thread_id}, rank:{self.rank}, engine_idx:{self.engine_idx()}, Async recv: got {len(store_ref)} object_refs for {bucket_name} (version {version})")
+            
+            # Use _async_recv_bucket_with_store_ref to receive data
+            received_tensors = self._async_recv_bucket_with_store_ref(store_ref, version, bucket_name, func_call, group_tensor_count)
+            
+            if received_tensors:
+                success_count += 1
+                # enhanced_print("param_update", None, f"i:{i}, rank:{self.rank} Async recv: completed {bucket_name} (version {version})")
+            else:
+                enhanced_print("param_update", None, f"i:{i}, rank:{self.rank} Async recv: failed {bucket_name} (version {version})")
+            
+            if self.verbose_logging:
+                enhanced_print("param_update", None, f"Async recv: {bucket_name} processed")
+        
+        t2 = time.time()
+        # send / recv run independently, version from send
+        self.current_version = version
+        enhanced_print("param_update", None, f"Async recv: completed {success_count}/{len(groups)} buckets(version:{version}), cost time:{t2-t1:.2f}, worker thread ending")
+
+    def _async_send_bucket(self, bucket_tensors, ray_rank, bucket_name, version=None):
+        """Async send single bucket - train node (using Ray Collective)"""
+        if self.verbose_logging:
+            enhanced_print("param_update", None, f"Async send: starting {bucket_name} with {len(bucket_tensors)} tensors")
+        
+        # If no version provided, use default value
+        if version is None:
+            version = 1
+        
+        # Check if bucket_tensors is empty
+        if not bucket_tensors:
+            enhanced_print("param_update", None, f"ERROR: bucket_tensors is empty for {bucket_name}")
+            return False
+        
+        # Ensure all tensors are on CPU, prepare for Ray put
+        cpu_tensors = []
+        for name, tensor in bucket_tensors:
+            if tensor.device.type == 'cuda':
+                tensor = tensor.cpu()
+            cpu_tensors.append((name, tensor))
+        
+        fused_tensors = cpu_tensors
+        
+        # Check fusion result
+        if not fused_tensors:
+            enhanced_print("param_update", None, f"Async send: no fused tensors for {bucket_name}, skipping broadcast")
+            return True
+        
+        # Send fusion tensors for each dtype group
+        def broadcast_tensor(tensor, src_rank, group_name):
+            # Use Ray put/get async communication, tensor is already on CPU
+            # Check if tensor is valid
+            if tensor is None:
+                enhanced_print("param_update", None, f"Invalid tensor for {group_name}, skipping broadcast")
+                return None
+            
+            # Use cross_process_ray_put for put operation
+            start_time = time.time()
+            object_ref = cross_process_ray_put(tensor, version=version)  # Use version passed in
+            put_time = time.time() - start_time
+            
+            if self.verbose_logging:
+                enhanced_print("param_update", None, f"Async send: {group_name} - put={put_time*1000:.1f}ms")
+            
+            return object_ref
+        
+        # Send fusion tensors
+        object_refs = []  # Store object_refs for cross-ray-actor communication
+        
+        # Broadcast the whole [(name, tensor)]
+        object_refs = broadcast_tensor(fused_tensors, ray_rank, bucket_name)
+        
+        # Store object_refs to store_refs_queue, for recv thread to use
+        if self.store_refs_queue is not None and object_refs:
+            # TOOD: Here can be optimized, temporarily use put multiple times;
+            # print(f"rank:{self.rank}, len of self.store_refs_queue:{len(self.store_refs_queue)}")
+            for queue_idx in range(len(self.store_refs_queue)):
+                # enhanced_print("param_update", None, f"rank:{self.rank}, Async send: storing object_refs for {bucket_name}, idx:{queue_idx} queue size={self.store_refs_queue[queue_idx].qsize()}, bucket_name:{bucket_name}")
+                self.store_refs_queue[queue_idx].put({
+                    'bucket_name': bucket_name,
+                    'version': version,
+                    'object_refs': object_refs
+                })
+            # enhanced_print("param_update", None, f"Async send: stored object_refs for {bucket_name} x {len(self.store_refs_queue)} queues")
+        
+        if self.verbose_logging:
+            enhanced_print("param_update", None, f"Async send: {bucket_name} sent successfully")
+        
+        return True
+
+    def _execute_async_func_call(self, func_call, named_tensors):
+        """Execute async func_call"""
+        result = func_call(named_tensors)
+        return result
+
+    def _async_recv_bucket_with_store_ref(self, store_ref, version, bucket_name, func_call, group_tensor_count):
+        """Async recv single bucket - receive data and update buffer"""
+        
+        # Check if store_ref is valid
+        if not store_ref:
+            enhanced_print("param_update", None, f"Async recv: no store_ref provided for {bucket_name}")
+            return False
+
+        get_start_time = time.time()
+
+        # [(name, tensor)]
+        received_tensors = cross_process_ray_get(store_ref)
+        
+        get_time = time.time() - get_start_time
+        
+        if not received_tensors:
+            enhanced_print("param_update", None, f"Async recv: no data available for version {version}")
+            return False
+        
+        if self.verbose_logging:
+            enhanced_print("param_update", None, f"Async recv: Ray get {len(received_tensors)} tensors completed for {bucket_name} (version {version}) in {get_time:.3f}s")
+        
+        named_tensors = received_tensors
+        
+        # Update buffer data
+        if func_call and named_tensors:            
+            # Directly call func_call to update buffer data
+            buffer_success = func_call(named_tensors, version, group_tensor_count)
+            
+            return buffer_success
+        
+        return True
\ No newline at end of file
diff --git a/verl/workers/param_update/ray_async_communication.py b/verl/workers/param_update/ray_async_communication.py
new file mode 100644
index 00000000000..a81929c8353
--- /dev/null
+++ b/verl/workers/param_update/ray_async_communication.py
@@ -0,0 +1,65 @@
+"""
+Asynchronous communication manager based on Ray put/get
+Coordinate with multi-threading to achieve overlap, avoid distributed communication deadlock
+"""
+
+import threading
+import time
+import torch
+import ray
+from typing import Dict, List, Optional, Any, Union
+from enum import Enum, auto
+import queue
+import pickle
+
+
+def cross_process_ray_put(data, version: int = None):
+    """Cross-process Ray put operation - return object_ref for other processes to use"""
+
+    # Record start time for performance profiling    
+    put_start = time.time()
+    object_ref = ray.put([data])  # Ray doesn't support direct put of ref-object, wrap data in list
+    put_time = time.time() - put_start
+    
+    return object_ref
+
+def cross_process_ray_get(object_ref):
+    """Cross-process Ray get operation - directly use object_ref to get data, and clean up object ref"""
+    # Record start time for performance profiling
+    get_ref_start = time.time()
+    object = ray.get(object_ref)[0]
+    get_ref_time = time.time() - get_ref_start
+    
+    return object
+
+def create_store_refs_queue():
+    """Create store refs queue"""
+    queue = ray.util.queue.Queue(maxsize=10)
+    return queue
+
+def put_store_refs_to_queue(queue, store_refs):
+    """Put store refs into queue"""
+    try:
+        # Clear queue, only keep latest store refs
+        # while not queue.empty():
+        #     queue.get_nowait()
+        
+        # Put new store refs
+        queue.put(store_refs)
+        return True
+    except Exception as e:
+        print(f"[StoreRefsQueue] Failed to put store refs: {e}")
+        return False
+
+def get_store_refs_from_queue(queue):
+    """Get store refs from queue"""
+    try:
+        # Blocking get, wait for store refs to be available
+        store_refs = queue.get()
+        # print(f"[StoreRefsQueue] Got {len(store_refs)} store refs from queue")
+        return store_refs
+    except Exception as e:
+        print(f"[StoreRefsQueue] Failed to get store refs: {e}")
+        return None
+
+ 
\ No newline at end of file
diff --git a/verl/workers/rollout/sglang_rollout/dual_buffer_engine.py b/verl/workers/rollout/sglang_rollout/dual_buffer_engine.py
new file mode 100644
index 00000000000..e392d2f25b1
--- /dev/null
+++ b/verl/workers/rollout/sglang_rollout/dual_buffer_engine.py
@@ -0,0 +1,585 @@
+"""
+Real dual-buffer SGLang engine implementation
+
+Fix current dual-buffer implementation issues:
+1. Support two independent weight copies
+2. Update weights without affecting current active buffer
+3. Support atomic buffer switching
+"""
+
+import asyncio
+import os
+from typing import List, Tuple, Optional, Dict, Any
+import time
+import torch
+import threading
+import pickle
+
+import sglang.srt.entrypoints.engine
+from sglang.srt.server_args import ServerArgs
+from sglang.srt.managers.tokenizer_manager import TokenizerManager
+from sglang.srt.entrypoints.engine import UpdateWeightsFromTensorReqInput
+from sglang.srt.model_executor.model_runner import LocalSerializedTensor
+from sglang.srt.utils import (
+    MultiprocessingSerializer,
+)
+from sglang.srt.managers.tokenizer_manager import (
+    ReleaseMemoryOccupationReqInput,
+    ResumeMemoryOccupationReqInput,
+    UpdateWeightsFromTensorReqInput,
+)
+
+from verl.trainer.ppo.pipeline.pipeline_utils import enhanced_print
+
+
+class _BufferManager:
+    """Encapsulate buffer index/version/update logic while reusing engine storage.
+    This manager operates directly on the owning engine's attributes to avoid
+    duplicating state and to keep changes minimal and localized.
+    """
+    def __init__(self, engine, bucket_size_mb=None):
+        self._e = engine
+        self._use_reqinput = True  # Enable prefetch, avoid sharing issues by non-FD serialization
+        self._use_torch_fd = True  # Keep Torch FD for best performance
+        self._use_batch_serialize = True  # Enable batch serialization: fresh serialize the whole list at once when switching
+        self._use_reqinput_prefetch = False  # Prefetch serialization in recv stage, can further overlap serialization time; WIP
+        
+        # Maintain per-buffer (name -> serialized bytes) pool, avoid cross-process FD handle expiration
+        # Note: here store the bytes after MultiprocessingSerializer.serialize, not RequestInput object
+        self._serialized_pool = [dict(), dict()]
+        
+        # Unified bucket size setting, support external input or default value
+        if bucket_size_mb is not None:
+            self._batch_size_mb = bucket_size_mb
+        else:
+            # Default from environment variable, if not set, use default value
+            self._batch_size_mb = int(os.environ.get('PARAM_UPDATE_BUFFER_BUCKET_SIZE_MB', '128'))
+        
+        self.tp_size = getattr(self._e.server_args, 'tp_size', 1)
+        
+        # Print configuration information
+        enhanced_print("BufferManager", None, f"Initialized with bucket_size_mb={self._batch_size_mb}, tp_size={self.tp_size}")
+
+    def get_bucket_size_mb(self) -> int:
+        """Get current bucket size setting"""
+        return self._batch_size_mb
+    
+    def set_bucket_size_mb(self, bucket_size_mb: int):
+        """Set bucket size"""
+        self._batch_size_mb = bucket_size_mb
+        enhanced_print("BufferManager", None, f"Updated bucket_size_mb to {bucket_size_mb}")
+
+    def target_for_update(self) -> int:
+        # Default update non-active buffer
+        if not self._e._buffer_ready[self._e._active_buffer]:
+            return self._e._active_buffer
+        return 1 - self._e._active_buffer
+
+    def _ensure_buffer_dict(self, buf_idx: int) -> None:
+        # Safety check: ensure index is within valid range
+        if buf_idx < 0 or buf_idx >= len(self._e._buffer_weights):
+            enhanced_print("DualBufferAsyncEngine", None, f"Invalid buffer index: {buf_idx}, valid range: 0-{len(self._e._buffer_weights)-1}")
+            return
+        
+        if self._e._buffer_weights[buf_idx] is None:
+            self._e._buffer_weights[buf_idx] = {}
+        
+        if self._e._buffer_metas[buf_idx] is None:
+            self._e._buffer_metas[buf_idx] = {}
+
+    def _serialize_named_tensors(self, named_tensors):
+        # Input is [(name, tensor)]
+        # Support batch serialization to improve performance, avoid cross-process sharing issues
+        
+        if self._use_batch_serialize:
+            # Batch serialization: serialize each tensor separately, maintain [(name, tensor)] format
+            serialized_list = []
+            for name, tensor in named_tensors:
+                ser_per_tp = []
+                for tp_idx in range(self.tp_size):
+                    cpu_tensor = tensor.detach().cpu().contiguous().clone()
+                    if self._use_torch_fd:
+                        # Create independent tensor copy for each tp, ensure FD token uniqueness
+                        serialized = MultiprocessingSerializer.serialize([(name, cpu_tensor)])
+                    else:
+                        # Use pickle serialization, avoid FD token
+                        serialized = pickle.dumps([(name, cpu_tensor)])
+                    ser_per_tp.append(serialized)
+                serialized_list.append((name, ser_per_tp))
+            return serialized_list
+        else:
+            # Per-tensor serialization (original logic)
+            serialized_list = []
+            for name, tensor in named_tensors:
+                ser_per_tp = []
+                cpu_tensor = tensor.detach().cpu().contiguous()
+                for tp_idx in range(self.tp_size):
+                    if self._use_torch_fd:
+                        serialized = MultiprocessingSerializer.serialize([(name, cpu_tensor)])
+                    else:   
+                        serialized = pickle.dumps([(name, cpu_tensor)])
+                    ser_per_tp.append(serialized)
+                serialized_list.append((name, ser_per_tp))
+            return serialized_list
+
+    def _create_reqinput_from_serialized(self, serialized_items_per_tp):
+        # Assemble per-tp bytes into RequestInput
+        # serialized_items_per_tp should be list[bytes], each bytes corresponds to one tp's serialized data
+        return UpdateWeightsFromTensorReqInput(
+            serialized_named_tensors=serialized_items_per_tp,
+            load_format=None,
+            flush_cache=False,
+        )
+
+    def _calculate_tensor_size(self, tensor: torch.Tensor) -> int:
+        """Calculate tensor size (bytes)"""
+        return int(tensor.numel() * tensor.element_size())
+
+    def _create_batches_by_size(self, items, batch_size_mb: int = None):
+        """Create batches by size"""
+        if batch_size_mb is None:
+            batch_size_mb = self._batch_size_mb
+        
+        batch_bytes_limit = int(batch_size_mb * 1024 * 1024)
+        batches = []  # List[List[(name, tensor)]]
+        cur_batch = []
+        cur_bytes = 0
+        
+        for name, tensor in items:
+            t_bytes = self._calculate_tensor_size(tensor)
+            if cur_batch and cur_bytes + t_bytes > batch_bytes_limit:
+                batches.append(cur_batch)
+                cur_batch = []
+                cur_bytes = 0
+            cur_batch.append((name, tensor))
+            cur_bytes += t_bytes
+        
+        if cur_batch:
+            batches.append(cur_batch)
+        
+        return batches
+
+    def register_update(self, named_tensors, version: int, group_tensor_count: int) -> bool:
+        """Register/accumulate tensors into the non-active buffer with version checks.
+        - Reject strictly smaller version to prevent rollback.
+        - Allow equal version to accumulate incremental chunks in the same version.
+        - Mark buffer as ready on first write for this version.
+        - Always call engine.set_version(version) to advance latest_version monotonically.
+        Returns whether this update is accepted and applied.
+        """
+        target_buffer = self.target_for_update()
+        existing_version = self._e._buffer_versions[target_buffer]
+        if existing_version is not None and version < existing_version:
+            enhanced_print("DualBufferAsyncEngine", None, f"Ignore outdated update: buffer {target_buffer} has version {existing_version} > incoming {version}")
+            return False
+
+        # Ensure dict initialized and accumulate
+        self._ensure_buffer_dict(target_buffer)
+        if isinstance(named_tensors, dict):
+            iterable = list(named_tensors.items())
+        else:
+            iterable = list(named_tensors)
+
+        if self._use_batch_serialize:
+            # Only save tensor to buffer, no serialization
+            for name, tensor in iterable:
+                self._e._buffer_weights[target_buffer][name] = tensor.detach().cpu()
+                self._e._buffer_metas[target_buffer][name] = 1  # Record metadata, avoid duplicate serialization
+            if self._use_reqinput_prefetch:
+                # Mark for re-serialization
+                self._serialized_pool[target_buffer]["_needs_reserialize"] = True
+                # Start unified serialization when last tensor arrives
+                if len(self._e._buffer_metas[target_buffer]) == group_tensor_count:
+                    self._serialized_pool[target_buffer]["_reqinput"] = self.build_payload_for_apply(target_buffer)
+                    self._serialized_pool[target_buffer]["_needs_reserialize"] = False
+            else:
+                # Mark for re-serialization
+                self._serialized_pool[target_buffer]["_needs_reserialize"] = True
+        else:
+            # Per-tensor mode (original logic)
+            serialized_items = self._serialize_named_tensors(iterable)
+            for (name, tensor), (s_name, s_list) in zip(iterable, serialized_items):
+                assert name == s_name
+                self._e._buffer_weights[target_buffer][name] = tensor.detach().cpu()
+                # s_list: list[bytes] length == self.tp_size
+                self._serialized_pool[target_buffer][name] = s_list
+
+        # Check if all tensors have arrived
+        current_tensor_count = len(self._e._buffer_weights[target_buffer])
+        if current_tensor_count >= group_tensor_count:
+            # All tensors arrived, mark buffer as ready
+            self._e._buffer_ready[target_buffer] = True
+
+        self._e._buffer_versions[target_buffer] = version
+        # Update latest version via engine API
+        self._e.set_version(version)
+        # Mark serialized pool as needing refresh
+
+        enhanced_print("DualBufferAsyncEngine", None, f"Registered update: buffer {target_buffer}, version {version}, tensors {len(iterable)}")
+        return True
+
+    def get_buffer_items(self, buf_idx: int):
+        buf = self._e._buffer_weights[buf_idx]
+        if buf is None:
+            return []
+        return list(buf.items())
+
+    def build_payload_for_apply(self, buf_idx: int):
+        """Build payload for sending based on configuration:
+        - If use_reqinput=True, merge all serialized bytes of the buffer into a single RequestInput
+        - Otherwise return [(name, tensor)] original list
+        """
+        items = self.get_buffer_items(buf_idx)
+        if not items:
+            return None
+
+        if not self._serialized_pool[buf_idx]['_needs_reserialize']:
+            payload = self._serialized_pool[buf_idx]['_reqinput']
+            return payload
+        
+        if self._use_reqinput:
+            if self._use_batch_serialize:
+                # Batch mode: split into multiple RequestInputs by batch size, avoid OOM
+                batches = self._create_batches_by_size(items)
+                
+                # Serialize each batch into a RequestInput (fresh, avoid FD reuse)
+                req_inputs = []
+                for batch in batches:
+                    ser_list = []
+                    for tp_idx in range(self.tp_size):
+                        if self._use_torch_fd:
+                            # Build independent [(name, tensor)] list for current tp, avoid extra clone copy, serialization itself allocates unique FD
+                            tensors_copy = [(n, t) for n, t in batch]
+                            serialized = MultiprocessingSerializer.serialize(tensors_copy)
+                        else:
+                            serialized = pickle.dumps(batch)
+                        ser_list.append(serialized)
+                    req_inputs.append(self._create_reqinput_from_serialized(ser_list))
+                payload = req_inputs
+            else:
+                # Per-tensor serialization (immediate fresh serialization, avoid FD reuse)
+                req_inputs = []
+                for name, _ in items:
+                    tensor = self._e._buffer_weights[buf_idx][name]
+                    ser_list = []
+                    for tp_idx in range(self.tp_size):
+                        if self._use_torch_fd:
+                            # Clone then immediate serialization for each tp, ensure FD token uniqueness
+                            tensor_copy = tensor.clone()
+                            serialized = MultiprocessingSerializer.serialize([(name, tensor_copy)])
+                        else:
+                            serialized = pickle.dumps([(name, tensor)])
+                        ser_list.append(serialized)
+                    # Don't cache to _serialized_pool, avoid FD cross-use
+                    req_inputs.append(self._create_reqinput_from_serialized(ser_list))
+                payload = req_inputs
+        else:
+            payload = items
+        
+        return payload
+
+    def clear_buffer_metas(self, buf_idx: int):
+        """Clear metadata for specified buffer"""
+        self._e._buffer_metas[buf_idx].clear()
+        
+    def get_stats(self) -> dict:
+        """Get BufferManager statistics"""
+        return {
+            "bucket_size_mb": self._batch_size_mb,
+            "tp_size": self.tp_size,
+            "use_batch_serialize": self._use_batch_serialize,
+            "use_torch_fd": self._use_torch_fd,
+            "use_reqinput": self._use_reqinput,
+            "buffer_ready": self._e._buffer_ready.copy(),
+            "buffer_versions": self._e._buffer_versions.copy(),
+            "active_buffer": self._e._active_buffer
+        }
+
+
+class DualBufferAsyncEngine:
+    """Real dual-buffer SGLang engine, supporting two independent weight copies, inheriting AsyncEngine reuse logic"""
+    
+    def __new__(cls, **kwargs):
+        # Lazy import AsyncEngine to avoid circular reference
+        from .sglang_rollout import AsyncEngine
+        
+        # Dynamically create class inheriting from AsyncEngine
+        class DualBufferAsyncEngineImpl(AsyncEngine):
+            def __init__(self, **kwargs):
+                if 'bucket_size_mb' in kwargs:
+                    bucket_size_mb = kwargs.pop('bucket_size_mb', None)
+
+                super().__init__(**kwargs)
+                
+                # Dual-buffer state
+                self._active_buffer = 0  # Current active buffer (0 or 1)
+                self._buffer_ready = [False, False]  # Ready state of two buffers
+                self._buffer_weights = [None, None]  # Weights of two buffers
+                self._buffer_metas = [None, None]
+                self._buffer_versions = [None, None]  # Version numbers of two buffers
+                self._update_lock = threading.RLock()  # Use threading.RLock instead of asyncio.Lock, support cross-thread
+                
+                # Version management
+                self._current_version = 0  # Current active version
+                self._latest_version = 0   # Latest available version
+                
+                # Default need reload
+                self._need_reload = True
+
+                # Buffer manager encapsulating buffer ops
+                
+                self._bufman = _BufferManager(self, bucket_size_mb=bucket_size_mb)
+
+        def update_weights_from_tensor_sync(self, named_tensors, update_weights_func_call, load_format=None, flush_cache=True, target_buffer=None, version=None):
+            """Synchronized version of dual-buffer weight update - thread-safe, support cross-thread call"""
+            with self._update_lock:
+                # Determine target buffer
+                if target_buffer is None:
+                    # Default update non-active buffer
+                    target_buffer = 1 - self._active_buffer
+                
+                # Determine version number
+                if version is None:
+                    version = self._latest_version + 1
+                    self._latest_version = version
+                
+                enhanced_print("DualBufferAsyncEngine", None, f"Updating buffer {target_buffer} (active: {self._active_buffer}) for version {version}")
+                
+                # Update weights and version of specified buffer
+                self._buffer_weights[target_buffer] = named_tensors.copy()
+                self._buffer_ready[target_buffer] = True
+                self._buffer_versions[target_buffer] = version
+                
+                enhanced_print("DualBufferAsyncEngine", None, f"Buffer {target_buffer} updated successfully for version {version}")
+                
+                # Directly call AsyncEngine's update_weights_from_tensor to actually update engine weights
+                success = self._apply_weights_to_engine_sync(named_tensors, update_weights_func_call)
+                
+                if success:
+                    # Switch to new updated buffer
+                    self._active_buffer = target_buffer
+                    self._current_version = version
+                    self._bufman.clear_buffer_metas(target_buffer) # Clear buffer reference
+                    enhanced_print("DualBufferAsyncEngine", None, f"Switched to buffer {target_buffer} for version {version}")
+                else:
+                    enhanced_print("DualBufferAsyncEngine", None, f"ERROR: Failed to apply weights to engine for version {version}")
+                
+                return success
+        
+        def _register_and_update_buffer(self, target_buffer, named_tensors: Dict[str, torch.Tensor]):
+            """Register and update buffer. Note: named_tensors may be Dict[str, Tensor] or Iterable[(name, Tensor)]"""
+            # Only create dict when index is not initialized, avoid overwriting accumulated content
+            if self._buffer_weights[target_buffer] is None:
+                self._buffer_weights[target_buffer] = {}
+
+            # Unified iteration interface, support both dict and (name, tensor) iterable
+            if isinstance(named_tensors, dict):
+                iterable = named_tensors.items()
+            else:
+                iterable = named_tensors
+
+            for name, tensor in iterable:
+                self._buffer_weights[target_buffer][name] = tensor
+
+        def set_params_meta(self, params_meta):
+            self._buffer_meta = params_meta
+
+        def _get_buffer(self, target_buffer):
+            # convert dict to [(name, tensor)]
+            buffer = self._buffer_weights[target_buffer]
+            return [(name, tensor) for name, tensor in buffer.items()]
+
+        def wait_for_buffer_write(self):
+            target_buffer = self._active_buffer
+            enhanced_print("DualBufferAsyncEngine", None, f"Waiting for buffer {target_buffer} to be ready...")
+            while not self._buffer_ready[target_buffer]:
+                # wait for update_buffer_data_only update target_buffer;
+                time.sleep(0.1)
+
+        def update_buffer_data_only(self, named_tensors, version, group_tensor_count):
+            """Only update buffer data, not execute update_weights - used for recv thread
+            Rules: by buffer manager: always write to non-active buffer; equal version allows incremental same version; strictly smaller version rejects rollback; and advances latest_version.
+            """
+            with self._update_lock:
+                applied = self._bufman.register_update(named_tensors, version, group_tensor_count)
+                return applied
+
+        
+        def _run_async_in_sync_context(self, coro):
+            """Wrapper function to run async coroutine in sync context"""
+            import asyncio
+            try:
+                loop = asyncio.get_event_loop()
+                if loop.is_running():
+                    # If event loop is running, create new event loop
+                    new_loop = asyncio.new_event_loop()
+                    asyncio.set_event_loop(new_loop)
+                    result = new_loop.run_until_complete(coro)
+                    new_loop.close()
+                else:
+                    # If event loop is not running, use directly
+                    result = loop.run_until_complete(coro)
+            except RuntimeError:
+                # If no event loop, create new
+                new_loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(new_loop)
+                result = new_loop.run_until_complete(coro)
+                new_loop.close()
+            return result
+
+        def _apply_weights_to_engine_sync(self, weights, update_weights_func_call):
+            """Synchronized version: apply weights to engine - directly call AsyncEngine's update_weights_from_tensor"""
+            try:                
+                # Directly call AsyncEngine's update_weights_from_tensor method
+                # Use sync wrapper to handle async call
+                result = self._run_async_in_sync_context(
+                    update_weights_func_call(
+                        weights,
+                        use_reqinput=self._bufman._use_reqinput
+                    )
+                )
+                return result
+            except Exception as e:
+                print(f"[DualBufferAsyncEngine] ERROR: Failed to apply weights to AsyncEngine: {e}")
+                import traceback
+                traceback.print_exc()
+                return False
+        
+        def get_current_version(self):
+            """Get current active version number"""
+            with self._update_lock:
+                return self._current_version
+        
+        def get_latest_version(self):
+            """Get latest available version number"""
+            with self._update_lock:
+                return self._latest_version
+        
+        def set_version(self, version):
+            """Set version number - used for external version management (guarantee monotonic increase), and not rollback current_version"""
+            with self._update_lock:
+                if version > self._latest_version:
+                    self._latest_version = version
+                # Avoid rollback current version
+                if self._current_version is None or self._current_version < 0:
+                    self._current_version = 0
+                return True
+        
+        def execute_update_weights_before_generate(self, update_weights_func_call):
+            """Execute weight update before generate - switch to latest version"""
+            with self._update_lock:
+                enhanced_print("DualBufferAsyncEngine", None, f"Current version: {self._current_version}, Latest version: {self._latest_version}")
+                
+                # Check if there is a new version available
+                if self._latest_version >= self._current_version:
+                    # Find buffer containing latest version
+                    target_buffer = None
+                    for buffer_id in range(2):
+                        if self._buffer_versions[buffer_id] == self._latest_version: # and self._buffer_ready[buffer_id]:
+                            target_buffer = buffer_id
+                            break
+                    
+                    if target_buffer is not None:                        
+                        # Switch to new version
+                        success = self.switch_to_buffer_sync(target_buffer, update_weights_func_call)
+                        if success:
+                            self._current_version = self._latest_version
+                            enhanced_print("DualBufferAsyncEngine", None, f"Successfully switched to buffer {target_buffer} for version {self._latest_version}")
+                            return True
+                        else:
+                            enhanced_print("DualBufferAsyncEngine", None, f"Failed to switch to version {self._latest_version}")
+                            return False
+                    else:
+                        enhanced_print("DualBufferAsyncEngine", None, f"Latest version {self._latest_version} not found in any ready buffer")
+                        return False
+                else:
+                    enhanced_print("DualBufferAsyncEngine", None, f"No new version available, current version: {self._current_version}")
+                    return True
+        
+        def switch_to_buffer_sync(self, buffer_id=-1, update_weights_func_call=None):
+            """Synchronized version: switch to specified buffer - thread-safe"""
+            if buffer_id == -1:
+                buffer_id = self.get_latest_version()
+            with self._update_lock:
+                if not self._buffer_ready[buffer_id]:
+                    enhanced_print("DualBufferAsyncEngine", None, f"Buffer {buffer_id} not ready, cannot switch")
+                    return False
+                
+                # First time cannot skip
+                # if buffer_id == self._active_buffer:
+                #     # Already in use, no need to switch again
+                #     return True
+                
+                # Apply weights of new buffer to engine (only switch active_buffer and current_version after successful application)
+                t1 = time.time()
+                buffer = self._get_buffer(buffer_id)
+                if buffer is None:
+                    enhanced_print("DualBufferAsyncEngine", None, f"Buffer {buffer_id} is empty, cannot switch")
+                    return False
+
+                payload = self._bufman.build_payload_for_apply(buffer_id)
+                t2 = time.time()
+                
+                if payload is None:
+                    enhanced_print("DualBufferAsyncEngine", None, f"Build payload failed for buffer {buffer_id}")
+                    return False
+
+                success = self._apply_weights_to_engine_sync(payload, update_weights_func_call)
+                
+                t3 = time.time()
+                # Build payload for buffer 0 took 35.02 s, apply weights took 12.09 s
+                enhanced_print("DualBufferAsyncEngine", None, f"Build payload for buffer {buffer_id} took {t2-t1:.2f} s, apply weights took {t3-t2:.2f} s")
+
+                if success:
+                    # Switch to new buffer and update current version
+                    self._active_buffer = buffer_id
+                    self._current_version = self._buffer_versions[buffer_id]
+                    self._bufman.clear_buffer_metas(buffer_id)  # Clear buffer reference
+                    enhanced_print("DualBufferAsyncEngine", None, f"Successfully applied and switched to buffer {buffer_id} for version {self._current_version}")
+                    return True
+                else:
+                    enhanced_print("DualBufferAsyncEngine", None, f"Failed to apply weights from buffer {buffer_id} to engine; not switching")
+                    return False
+        
+        # Add methods to dynamically created class
+        DualBufferAsyncEngineImpl.update_weights_from_tensor_sync = update_weights_from_tensor_sync
+        DualBufferAsyncEngineImpl.update_buffer_data_only = update_buffer_data_only
+        DualBufferAsyncEngineImpl._apply_weights_to_engine_sync = _apply_weights_to_engine_sync
+        DualBufferAsyncEngineImpl.get_current_version = get_current_version
+        DualBufferAsyncEngineImpl.get_latest_version = get_latest_version
+        DualBufferAsyncEngineImpl.set_version = set_version
+        DualBufferAsyncEngineImpl.execute_update_weights_before_generate = execute_update_weights_before_generate
+        DualBufferAsyncEngineImpl.switch_to_buffer_sync = switch_to_buffer_sync
+        DualBufferAsyncEngineImpl._run_async_in_sync_context = _run_async_in_sync_context
+        DualBufferAsyncEngineImpl._register_and_update_buffer = _register_and_update_buffer
+        DualBufferAsyncEngineImpl._get_buffer = _get_buffer
+        DualBufferAsyncEngineImpl.wait_for_buffer_write = wait_for_buffer_write
+        DualBufferAsyncEngineImpl.set_params_meta = set_params_meta
+        
+        # Add statistics and configuration methods
+        def get_stats(self) -> dict:
+            """Get dual-buffer engine statistics"""
+            return {
+                "active_buffer": self._active_buffer,
+                "buffer_ready": self._buffer_ready.copy(),
+                "buffer_versions": self._buffer_versions.copy(),
+                "current_version": self._current_version,
+                "latest_version": self._latest_version,
+                "need_reload": self._need_reload,
+                "buffer_manager": self._bufman.get_stats()
+            }
+        
+        def get_bucket_size_mb(self) -> int:
+            """Get current bucket size setting"""
+            return self._bufman.get_bucket_size_mb()
+        
+        def set_bucket_size_mb(self, bucket_size_mb: int):
+            """Set bucket size"""
+            self._bufman.set_bucket_size_mb(bucket_size_mb)
+        
+        DualBufferAsyncEngineImpl.get_stats = get_stats
+        DualBufferAsyncEngineImpl.get_bucket_size_mb = get_bucket_size_mb
+        DualBufferAsyncEngineImpl.set_bucket_size_mb = set_bucket_size_mb
+        
+        # Return new instance
+        return DualBufferAsyncEngineImpl(**kwargs)
diff --git a/verl/workers/rollout/sglang_rollout/sglang_rollout.py b/verl/workers/rollout/sglang_rollout/sglang_rollout.py
index a684021c439..59bba88a13f 100644
--- a/verl/workers/rollout/sglang_rollout/sglang_rollout.py
+++ b/verl/workers/rollout/sglang_rollout/sglang_rollout.py
@@ -22,7 +22,7 @@
 import time
 from copy import deepcopy
 from json import JSONDecodeError
-from typing import Any, Optional
+from typing import Any, Optional, List, Tuple
 from uuid import uuid4
 
 import numpy as np
@@ -37,6 +37,7 @@
 from sglang.srt.sampling.sampling_params import SamplingParams
 from sglang.srt.server_args import ServerArgs
 from sglang.srt.utils import (
+    MultiprocessingSerializer,
     assert_pkg_version,
     get_ip,
     get_open_port,
@@ -156,6 +157,27 @@ async def resume_memory_occupation(self, tags: Optional[list[str]] = None):
     async def update_weights_from_tensor(self, update_weights_request: UpdateWeightsFromTensorReqInput):
         return await self.tokenizer_manager.update_weights_from_tensor(update_weights_request, None)
 
+    async def update_weights_from_tensor_legacy(
+        self,
+        named_tensors: List[Tuple[str, torch.Tensor]],  # noqa: UP006
+        load_format: Optional[str] = None,
+        flush_cache: bool = True,
+    ):
+        """Update weights from distributed source. If there are going to be more updates, set `flush_cache` to be false
+        to avoid duplicated cache cleaning operation."""
+        obj = UpdateWeightsFromTensorReqInput(
+            serialized_named_tensors=[MultiprocessingSerializer.serialize(named_tensors) for _ in range(self.server_args.tp_size)],
+            load_format=load_format,
+            flush_cache=flush_cache,
+        )
+        return await self.tokenizer_manager.update_weights_from_tensor(obj, None)
+
+    async def update_weights_from_reqinput(
+        self,
+        obj: UpdateWeightsFromTensorReqInput,
+    ):
+        return await self.tokenizer_manager.update_weights_from_tensor(obj, None)
+
     async def flush_cache(self):
         return await self.tokenizer_manager.flush_cache()
 
@@ -219,15 +241,17 @@ def _map_each_response(resp):
     batched_output_token_ids = []
     batched_logprobs = []
     for output_token_ids, log_probs in out_map:
-        batched_output_token_ids.append(output_token_ids)
-        batched_logprobs.append(log_probs)
+        # 检查tensor是否为空
+        if output_token_ids.numel() > 0:
+            batched_output_token_ids.append(output_token_ids)
+            batched_logprobs.append(log_probs)
+
     pad_token_id = tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id
     batched_output_token_ids = pad_sequence(batched_output_token_ids, batch_first=True, padding_value=pad_token_id)
     if len(batched_logprobs) > 0:
         batched_logprobs = pad_sequence(batched_logprobs, batch_first=True, padding_value=pad_token_id)
     return batched_output_token_ids, batched_logprobs
 
-
 def get_tool_call_parser_type(
     processing_class: PreTrainedTokenizer | PreTrainedTokenizerFast | ProcessorMixin,
 ) -> str:
@@ -262,6 +286,7 @@ def __init__(
         port=None,
         trust_remote_code: bool = False,
         device_mesh: DeviceMesh | None = None,
+        sharding_manager=None,
         **kwargs,
     ):
         """Synchronized SGLang rollout engine.
@@ -289,6 +314,8 @@ def __init__(
         super().__init__()
         self.config = config
         self._device_mesh_cpu = device_mesh
+        self.sharding_manager = sharding_manager
+        
         os.environ.setdefault("SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK", "true")
 
         (
@@ -326,6 +353,9 @@ def __init__(
                 self.pad_token_id = self.processing_class.tokenizer.pad_token_id
             except AttributeError as e:
                 raise ValueError(f"Cannot get pad_token_id from processing_class {self.processing_class}") from e
+        
+        self.param_update_manager = kwargs.get('param_update_manager', None)
+
 
     def _init_distributed_env(self, device_mesh_cpu, **kwargs):
         self._device_mesh_cpu = device_mesh_cpu
@@ -446,40 +476,79 @@ def _init_inference_engine(self, trust_remote_code, actor_module, port):
         if first_rank_in_node:
             rank = dist.get_rank()
             os.environ["SGLANG_BLOCK_NONZERO_RANK_CHILDREN"] = "0"
-            self._engine = AsyncEngine(
-                model_path=actor_module,
-                dtype=self.config.dtype,
-                mem_fraction_static=self.config.gpu_memory_utilization,
-                enable_memory_saver=True,
-                base_gpu_id=0,
-                gpu_id_step=1,
-                tp_size=self._tp_size,
-                node_rank=node_rank,
-                load_format=load_format,
-                dist_init_addr=dist_init_addr,
-                nnodes=nnodes,
-                trust_remote_code=trust_remote_code,
-                # NOTE(linjunrong): add rank to prevent SGLang generate same port inside PortArgs.init_new
-                # when random.seed is being set during training
-                port=30000 + rank,
-                # NOTE(Chenyang): turn on log_level to see the decoding speed of SGLang Engine
-                # log_level="INFO"
-                # NOTE(Chenyang): turn the following lines to see the input and output of each request
-                # log_requests=True,
-                # log_requests_level=2,
-                # NOTE(Chenyang): turn on max_running_requests to set the max concurrent running requests
-                # max_running_requests=1,
-                mm_attention_backend="fa3",
-                attention_backend=attention_backend if attention_backend is not None else "fa3",
-                # In async mode for AgentLoop, SGLang support token in token out to avoid the tokenizer
-                # inconsistency issue.
-                skip_tokenizer_init=self.config.mode == "async",
-                **engine_kwargs,
-            )
+
+            enable_dual_buffer = getattr(self.config, 'enable_dual_buffer', False)
+            if enable_dual_buffer:
+                print(f"[SGLangRollout] Initializing DualBufferAsyncEngine for dual buffer optimization")
+                from .dual_buffer_engine import DualBufferAsyncEngine
+                buffer_bucket_size_mb = getattr(self.config, 'param_update_consume_bucket_size_mb', 128)
+                self._engine = DualBufferAsyncEngine(
+                    model_path=actor_module,
+                    dtype=self.config.dtype,
+                    mem_fraction_static=self.config.gpu_memory_utilization,
+                    enable_memory_saver=True,
+                    base_gpu_id=0,
+                    gpu_id_step=1,
+                    tp_size=self._tp_size,
+                    node_rank=node_rank,
+                    load_format=load_format,
+                    dist_init_addr=dist_init_addr,
+                    nnodes=nnodes,
+                    trust_remote_code=trust_remote_code,
+                    # NOTE(linjunrong): add rank to prevent SGLang generate same port inside PortArgs.init_new
+                    # when random.seed is being set during training
+                    port=30000 + rank,
+                    # NOTE(Chenyang): turn on log_level to see the decoding speed of SGLang Engine
+                    # log_level="INFO"
+                    # NOTE(Chenyang): turn the following lines to see the input and output of each request
+                    # log_requests=True,
+                    # log_requests_level=2,
+                    # NOTE(Chenyang): turn on max_running_requests to set the max concurrent running requests
+                    # max_running_requests=1,
+                    mm_attention_backend="fa3",
+                    attention_backend=attention_backend if attention_backend is not None else "fa3",
+                    # In async mode for AgentLoop, SGLang support token in token out to avoid the tokenizer
+                    # inconsistency issue.
+                    skip_tokenizer_init=self.config.mode == "async",
+                    bucket_size_mb=buffer_bucket_size_mb,
+                    **engine_kwargs,
+                )
+                print(f"[SGLangRollout] DualBufferAsyncEngine initialized successfully")
+            else:
+                print(f"[SGLangRollout] Initializing standard AsyncEngine")
+                self._engine = AsyncEngine(
+                    model_path=actor_module,
+                    dtype=self.config.dtype,
+                    mem_fraction_static=self.config.gpu_memory_utilization,
+                    enable_memory_saver=True,
+                    base_gpu_id=0,
+                    gpu_id_step=1,
+                    tp_size=self._tp_size,
+                    node_rank=node_rank,
+                    load_format=load_format,
+                    dist_init_addr=dist_init_addr,
+                    nnodes=nnodes,
+                    trust_remote_code=trust_remote_code,
+                    # NOTE(linjunrong): add rank to prevent SGLang generate same port inside PortArgs.init_new
+                    # when random.seed is being set during training
+                    port=30000 + rank,
+                    # NOTE(Chenyang): turn on log_level to see the decoding speed of SGLang Engine
+                    # log_level="INFO"
+                    # NOTE(Chenyang): turn the following lines to see the input and output of each request
+                    # log_requests=True,
+                    # log_requests_level=2,
+                    # NOTE(Chenyang): turn on max_running_requests to set the max concurrent running requests
+                    # max_running_requests=1,
+                    mm_attention_backend="fa3",
+                    attention_backend=attention_backend if attention_backend is not None else "fa3",
+                    # In async mode for AgentLoop, SGLang support token in token out to avoid the tokenizer
+                    # inconsistency issue.
+                    skip_tokenizer_init=self.config.mode == "async",
+                    **engine_kwargs,
+                )
         else:
             self._engine = None
 
-        self.sharding_manager = None
         self.is_sleep = True
 
     def _init_sampling_params(self, **kwargs):
@@ -560,6 +629,18 @@ def _initialize_interactions(self, config):
         logger.info(f"Initialize interactions from configuration: interaction_map: {list(interaction_map.keys())}")
         return interaction_map
 
+    def get_update_weight_func(self):
+        update_func_call = self._engine.update_buffer_data_only if hasattr(self, '_engine') and self._engine is not None else None
+        return update_func_call
+
+    def set_params_meta(self, params_meta):
+        if hasattr(self, '_engine') and self._engine is not None:
+            self._engine.set_params_meta(params_meta)
+
+    def update_weight_from_dual_buffer(self):
+        func_call = self.sharding_manager.update_weights
+        update_success = self._engine.execute_update_weights_before_generate(func_call)
+
     @GPUMemoryLogger(role="sglang rollout", logger=logger)
     @torch.no_grad()
     def generate_sequences(self, prompts: DataProto, **kwargs) -> DataProto:
@@ -583,10 +664,27 @@ def generate_sequences(self, prompts: DataProto, **kwargs) -> DataProto:
             responses:     |<- LLM generation ->|<- tool_calls ->|<- LLM generation ->|<- padding ->|
             response_mask: | 1, 1, 1, ..., 1, 1 | 0, 0, .., 0, 0 | 1, 1, 1, ..., 1, 1 | 0, 0, ..., 0|
         """
+        
+        is_dual_buffer = hasattr(self, '_engine') and self._engine is not None
+        enable_dual_buffer = getattr(self.config, 'enable_dual_buffer', False)
+        if enable_dual_buffer and is_dual_buffer:
+            self._wait_for_param_update_completion()
+            self.update_weight_from_dual_buffer()
+
         if self.config.multi_turn.enable:
             return self._req_level_generate_sequences(prompts, **kwargs)
         return self._batch_level_generate_sequences(prompts, **kwargs)
 
+    def _wait_for_param_update_completion(self, timeout_seconds=150):
+        if not hasattr(self.param_update_manager, '_param_update_start_time'):
+            print("[SGLangRollout] No param_update started, skipping wait")
+            return
+        
+        start_time = self.param_update_manager._param_update_start_time
+        if hasattr(self, '_engine') and self._engine is not None:
+            self._engine.wait_for_buffer_write()
+            remaining = timeout_seconds - (time.time() - start_time)
+
     @GPUMemoryLogger(role="sglang rollout", logger=logger)
     @torch.no_grad()
     def _batch_level_generate_sequences(self, prompts: DataProto, **kwargs) -> DataProto:
@@ -759,6 +857,7 @@ def _batch_level_generate_sequences(self, prompts: DataProto, **kwargs) -> DataP
         seq = torch.cat([idx, response], dim=-1)
 
         response_length = response.size(1)
+        position_ids = position_ids.to("cpu", non_blocking=True)
         delta_position_id = torch.arange(1, response_length + 1, device=position_ids.device)
         delta_position_id = delta_position_id.unsqueeze(0).repeat(batch_size, 1)
         if position_ids.dim() == 3:  # qwen2vl mrope
@@ -791,9 +890,30 @@ def _batch_level_generate_sequences(self, prompts: DataProto, **kwargs) -> DataP
             batch["rollout_log_probs"] = rollout_log_probs
 
         # free cache engine
-        if self._engine is not None and self._tp_rank == 0:
-            loop = asyncio.get_event_loop()
-            loop.run_until_complete(self._engine.flush_cache())
+        if self.config.free_cache_engine and self._engine is not None and self._tp_rank == 0:
+            import threading
+            import concurrent.futures
+            current_thread = threading.current_thread()
+            is_main_thread = current_thread.name == 'MainThread'
+            
+            if is_main_thread:
+                loop = asyncio.get_event_loop()
+                loop.run_until_complete(self._engine.flush_cache())
+            else:
+                def run_async_flush_cache_in_new_thread():
+                    loop = asyncio.new_event_loop()
+                    asyncio.set_event_loop(loop)
+                    try:
+                        return loop.run_until_complete(self._engine.flush_cache())
+                    finally:
+                        loop.close()
+                with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
+                    future = executor.submit(run_async_flush_cache_in_new_thread)
+                    try:
+                        future.result()
+                    except Exception as e:
+                        print(f"Thread pool async flush_cache failed: {e}")
+                        pass
 
         return DataProto(batch=batch, non_tensor_batch=non_tensor_batch)
 
@@ -1588,3 +1708,11 @@ async def sleep(self):
             return
         await self.sharding_manager.sleep()
         self.is_sleep = True
+
+    def sync_per_tensor_generator(self):
+        if hasattr(self, 'param_update_manager') and self.param_update_manager is not None:            
+            if hasattr(self.param_update_manager, 'sync_per_tensor_generator'):
+                result = self.param_update_manager.sync_per_tensor_generator()
+                print(f"[SGLangRollout] param_update_manager.sync_per_tensor_generator completed")
+                return result
+
diff --git a/verl/workers/sharding_manager/megatron_sglang.py b/verl/workers/sharding_manager/megatron_sglang.py
index 65ca32a5ce0..c8efa97128b 100644
--- a/verl/workers/sharding_manager/megatron_sglang.py
+++ b/verl/workers/sharding_manager/megatron_sglang.py
@@ -20,10 +20,14 @@
 import asyncio
 import logging
 import os
+import time
 
 from omegaconf import DictConfig
 from sglang.srt.entrypoints.engine import Engine
-from sglang.srt.weight_sync.utils import update_weights as sgl_update_weights
+try:
+    from sglang.srt.weight_sync.utils import update_weights as sgl_update_weights
+except Exception:
+    sgl_update_weights = None
 from torch import nn
 from torch.distributed.device_mesh import DeviceMesh
 
@@ -145,17 +149,41 @@ async def update_weights(self, params):
             await self.inference_engine.resume_memory_occupation()
         named_tensors = params
 
-        update_weights_bucket_bytes = int(self.rollout_config.update_weights_bucket_megabytes) << 20
-        for params_batch in get_named_tensor_buckets(named_tensors, update_weights_bucket_bytes):
-            await sgl_update_weights(
-                engine=self.inference_engine,
-                params_batch=params_batch,
-                device_mesh_key="tp",
-                device_mesh=self.device_mesh,
-            )
+        if sgl_update_weights is not None:
+            update_weights_bucket_bytes = int(self.rollout_config.update_weights_bucket_megabytes) << 20
+            for params_batch in get_named_tensor_buckets(named_tensors, update_weights_bucket_bytes):
+                await sgl_update_weights(
+                    engine=self.inference_engine,
+                    params_batch=params_batch,
+                    device_mesh_key="tp",
+                    device_mesh=self.device_mesh,
+                )
+            
+            if self.device_mesh["tp"].get_local_rank() == 0:
+                await self.inference_engine.flush_cache()
+
+        else:
+            # Most naive implementation, can optimize a lot if it is bottleneck from sglang Engine weight update
+            # named_tensors = [(k, v) for k, v in params.items()]
+            named_tensors = params
+            load_format = None
+
+            for tensor_index, (name, tensor) in enumerate(named_tensors):
+                if self.device_mesh["tp"].get_local_rank() == 0:
+                    await self.inference_engine.update_weights_from_tensor_legacy(
+                        named_tensors=[
+                            (
+                                name,
+                                tensor.detach(),
+                            )
+                        ],
+                        load_format=load_format,
+                        flush_cache=False,
+                    )
+
+                if self.device_mesh["tp"].get_local_rank() == 0:
+                    await self.inference_engine.flush_cache()
 
-        if self.device_mesh["tp"].get_local_rank() == 0:
-            await self.inference_engine.flush_cache()
 
     async def release_memory(self):
         if self.device_mesh["tp"].get_local_rank() == 0 and self.rollout_config.free_cache_engine:
@@ -215,3 +243,191 @@ def postprocess_data(self, data: DataProto) -> DataProto:
         if self.infer_tp_size == 1:
             return data
         return data.chunk(chunks=self.infer_tp_size)[self.device_mesh["tp"].get_local_rank()]
+
+class MegatronSGLangAsyncShardingManager(MegatronSGLangShardingManager):
+    """
+    This class is used to handle the async inference in Megatron SGLang.
+    It inherits from MegatronSGLangShardingManager and overrides the wake_up and sleep methods.
+    """
+    def __init__(
+        self,
+        actor_module: nn.ModuleList,
+        inference_engine: Engine,
+        model_config: DictConfig,
+        rollout_config: DictConfig,
+        transformer_config,
+        layer_name_mapping,
+        weight_converter,
+        device_mesh: DeviceMesh | None = None,
+        offload_param: bool = False,
+        bridge=None,
+    ):
+        self.actor_module = actor_module
+        self.inference_engine = inference_engine
+        self.rollout_config = rollout_config
+        self.model_config = model_config
+        self.transformer_config = transformer_config
+        self.layer_name_mapping = layer_name_mapping
+        self.weight_converter = weight_converter
+        self.device_mesh = device_mesh
+
+        if self.device_mesh is not None:
+            self.infer_tp_size = self.device_mesh["tp"].mesh.size()[0]
+        else:
+            self.infer_tp_size = self.inference_engine._tp_size
+
+        # Note that torch_random_states may be different on each dp rank
+        self.torch_random_states = get_torch_device().get_rng_state()
+        # get a random rng states
+        if self.device_mesh is not None:
+            gen_dp_rank = self.device_mesh["dp"].get_local_rank()
+            get_torch_device().manual_seed(gen_dp_rank + 1000)  # make sure all tp ranks have the same random states
+            self.gen_random_states = get_torch_device().get_rng_state()
+            get_torch_device().set_rng_state(self.torch_random_states)
+        else:
+            self.gen_random_states = None
+
+        self.dual_buffer_engine = None
+        if hasattr(inference_engine, 'update_buffer_data_only'):
+            self.dual_buffer_engine = inference_engine
+            print(f"[MegatronSGLangAsyncShardingManager] Using dual_buffer_engine: {type(inference_engine)}")
+
+    def set_model_parameters(self, actor_module: nn.ModuleList):
+        """
+        Set the actor module parameters for the sharding manager.
+        This is used to update the actor module parameters before inference.
+        """
+        self.actor_module = actor_module
+
+    def update_model_params(self, actor_module):
+        self.set_model_parameters(actor_module)
+        per_tensor_param = per_tensor_generator(
+            self.actor_module,
+            self.model_config,
+            self.weight_converter,
+            self.transformer_config,
+            self.layer_name_mapping,
+        )
+
+        loop = asyncio.get_event_loop()
+        loop.run_until_complete(self.update_weights(per_tensor_param))
+
+
+    @GPUMemoryLogger(role="MegatronSGLangAsyncShardingManager enter", logger=logger)
+    def __enter__(self):
+        self.timing = {}
+        with simple_timer("reshard", self.timing):
+            loop = asyncio.get_event_loop()
+            loop.run_until_complete(self.wake_up())
+
+    @GPUMemoryLogger(role="MegatronSGLangAsyncShardingManager exit", logger=logger)
+    def __exit__(self, exc_type, exc_value, traceback):
+        loop = asyncio.get_event_loop()
+        loop.run_until_complete(self.sleep())
+
+    def update_weights_sync(self, params):
+        """
+        Fully synchronous version of update_weights, avoid using async calls
+        """
+        named_tensors = params
+        load_format = None
+        
+        for tensor_index, (name, tensor) in enumerate(named_tensors):
+            if self.device_mesh["tp"].get_local_rank() == 0:
+                if hasattr(self.inference_engine, 'update_weights_from_tensor_sync'):
+                    self.inference_engine.update_weights_from_tensor_sync(
+                        named_tensors=[
+                            (
+                                name,
+                                tensor.detach(),
+                            )
+                        ],
+                        load_format=load_format,
+                        flush_cache=False,
+                    )
+                else:
+                    print(f"Warning: inference_engine has no update_weights_from_tensor_sync method")
+
+            if self.device_mesh["tp"].get_local_rank() == 0:
+                if hasattr(self.inference_engine, 'flush_cache_sync'):
+                    self.inference_engine.flush_cache_sync()
+                else:
+                    print(f"Warning: inference_engine has no flush_cache_sync method")
+
+    async def update_weights(self, params, use_reqinput=False):
+        # if self.device_mesh["tp"].get_local_rank() == 0:
+        #     await self.inference_engine.resume_memory_occupation()
+
+        if use_reqinput:
+            for obj in params:
+                if self.device_mesh["tp"].get_local_rank() == 0:
+                    await self.inference_engine.update_weights_from_reqinput(obj)
+                if self.device_mesh["tp"].get_local_rank() == 0:
+                    await self.inference_engine.flush_cache()
+        else:
+            # Most naive implementation, can optimize a lot if it is bottleneck from sglang Engine weight update
+            # named_tensors = [(k, v) for k, v in params.items()]
+            named_tensors = params
+            load_format = None
+
+            for tensor_index, (name, tensor) in enumerate(named_tensors):
+                if self.device_mesh["tp"].get_local_rank() == 0:
+                    await self.inference_engine.update_weights_from_tensor_legacy(
+                        named_tensors=[
+                            (
+                                name,
+                                tensor.detach(),
+                            )
+                        ],
+                        load_format=load_format,
+                        flush_cache=False,
+                    )
+
+                if self.device_mesh["tp"].get_local_rank() == 0:
+                    await self.inference_engine.flush_cache()
+        return True
+
+    async def release_memory(self):
+        if self.device_mesh["tp"].get_local_rank() == 0 and self.rollout_config.free_cache_engine:
+            await self.inference_engine.release_memory_occupation()
+
+
+    @GPUMemoryLogger(role="MegatronSGLangAsyncShardingManager enter", logger=logger)
+    async def wake_up(self):
+        # if self.offload_param:
+        #     load_megatron_model_to_gpu(self.actor_module, load_grad=False)
+        # if self.bridge is not None:
+        #     per_tensor_param = self.bridge.export_weights(self.actor_module)
+        # else:
+        #     per_tensor_param = per_tensor_generator(
+        #         self.actor_module,
+        #         self.model_config,
+        #         self.weight_converter,
+        #         self.transformer_config,
+        #         self.layer_name_mapping,
+        #     )
+        # await self.update_weights(per_tensor_param)
+        # if self.offload_param:
+        #     offload_megatron_model_to_cpu(self.actor_module)
+        get_torch_device().empty_cache()
+        # important: need to manually set the random states of each tp to be identical.
+        if self.device_mesh is not None:
+            self.torch_random_states = get_torch_device().get_rng_state()
+            get_torch_device().set_rng_state(self.gen_random_states)
+
+    @GPUMemoryLogger(role="MegatronSGLangAsyncShardingManager exit", logger=logger)
+    async def sleep(self):
+        # if self.rollout_config.free_cache_engine:
+        #     log_gpu_memory_usage("Before SGLang offload in sharding manager", logger=logger)
+        #     await self.release_memory()
+        #     log_gpu_memory_usage("After SGLang offload in sharding manager", logger=logger)
+
+        # for model in self.actor_module:
+        #     model.train()
+        # add empty cache after each compute
+        get_torch_device().empty_cache()
+
+        # restore random states
+        if self.device_mesh is not None:
+            self.gen_random_states = get_torch_device().get_rng_state()
+            get_torch_device().set_rng_state(self.torch_random_states)
diff --git a/verl/workers/sharding_manager/megatron_vllm.py b/verl/workers/sharding_manager/megatron_vllm.py
index a6ddb065c67..82c88e7f2b7 100644
--- a/verl/workers/sharding_manager/megatron_vllm.py
+++ b/verl/workers/sharding_manager/megatron_vllm.py
@@ -18,6 +18,7 @@
 import inspect
 import logging
 import os
+import time
 
 import torch
 import torch.distributed