From 6d24aa581c2b598210fdaa2abdab425f3c561336 Mon Sep 17 00:00:00 2001 From: Yingqi Cao <78573313+ioeddk@users.noreply.github.com> Date: Tue, 12 Nov 2024 13:47:43 -0800 Subject: [PATCH] added mmdetection3d and openpcdet integrations. (#334) * added mmdetection3d and openpcdet integrations. * Delete examples/mmdetection3d/converted_models/SECOND/second_hv_secfpn_8xb6-80e_kitti-3d-car-75d9305e.pth Deleted accidentally committed checkpoint * Delete examples/mmdetection3d/tools/model_converter.py Delete redundant empty converter placeholder * Update README.md Addedd torchsparse integration in the news section * Update README.md * Update README.md --- README.md | 1 + examples/README.md | 36 + examples/converter.py | 240 ++++++ examples/mmdetection3d/README.md | 79 ++ examples/mmdetection3d/configs/README.md | 1 + .../mmdetection3d/converted_models/README.md | 1 + examples/mmdetection3d/demo.ipynb | 512 +++++++++++++ .../scripts/run_evaluation/SECOND.sh | 3 + examples/mmdetection3d/setup.py | 7 + examples/mmdetection3d/ts_plugin/__init__.py | 7 + .../ts_plugin/models/__init__.py | 4 + .../ts_plugin/models/backbones/__init__.py | 1 + .../ts_plugin/models/backbones/resnet.py | 679 +++++++++++++++++ .../ts_plugin/models/layers/__init__.py | 1 + .../ts_plugin/models/layers/sparse_block.py | 137 ++++ .../models/middle_encoders/__init__.py | 3 + .../models/middle_encoders/sparse_encoder.py | 248 ++++++ .../models/middle_encoders/sparse_unet.py | 335 ++++++++ .../middle_encoders/voxel_set_abstraction.py | 334 ++++++++ .../models/roi_heads/bbox_heads/__init__.py | 1 + .../roi_heads/bbox_heads/parta2_bbox_head.py | 717 ++++++++++++++++++ examples/openpcdet/README.md | 66 ++ .../kitti_models/PartA2_plugin.yaml | 191 +++++ .../kitti_models/pv_rcnn_plugin.yaml | 249 ++++++ .../kitti_models/second_plugin.yaml | 121 +++ .../kitti_models/voxel_rcnn_car_plugin.yaml | 202 +++++ .../cbgs_voxel0075_voxelnext.yaml | 156 ++++ examples/openpcdet/converted_models/README.md | 1 + examples/openpcdet/converter_voxelnext.py | 241 ++++++ examples/openpcdet/demo.ipynb | 650 ++++++++++++++++ examples/openpcdet/pcdet_plugin/__init__.py | 33 + .../openpcdet/pcdet_plugin/models/__init__.py | 6 + .../models/backbones_2d/__init__.py | 1 + .../backbones_2d/map_to_bev/__init__.py | 5 + .../map_to_bev/height_compression.py | 28 + .../models/backbones_3d/__init__.py | 4 + .../models/backbones_3d/backbone3d.py | 151 ++++ .../backbones_3d/backbone_voxel_next.py | 279 +++++++ .../pcdet_plugin/models/backbones_3d/pfe.py | 413 ++++++++++ .../pcdet_plugin/models/backbones_3d/unet.py | 222 ++++++ .../models/dense_heads/__init__.py | 1 + .../models/dense_heads/voxel_next_head.py | 574 ++++++++++++++ .../pcdet_plugin/models/detectors/__init__.py | 1 + .../models/detectors/detector3d_template.py | 401 ++++++++++ .../pcdet_plugin/models/roi_heads/__init__.py | 1 + .../models/roi_heads/partA2_head.py | 252 ++++++ examples/openpcdet/setup.py | 45 ++ torchsparse/tensor.py | 32 + 48 files changed, 7673 insertions(+) create mode 100644 examples/README.md create mode 100644 examples/converter.py create mode 100644 examples/mmdetection3d/README.md create mode 100644 examples/mmdetection3d/configs/README.md create mode 100644 examples/mmdetection3d/converted_models/README.md create mode 100644 examples/mmdetection3d/demo.ipynb create mode 100755 examples/mmdetection3d/scripts/run_evaluation/SECOND.sh create mode 100644 examples/mmdetection3d/setup.py create mode 100644 examples/mmdetection3d/ts_plugin/__init__.py create mode 100644 examples/mmdetection3d/ts_plugin/models/__init__.py create mode 100644 examples/mmdetection3d/ts_plugin/models/backbones/__init__.py create mode 100644 examples/mmdetection3d/ts_plugin/models/backbones/resnet.py create mode 100644 examples/mmdetection3d/ts_plugin/models/layers/__init__.py create mode 100644 examples/mmdetection3d/ts_plugin/models/layers/sparse_block.py create mode 100644 examples/mmdetection3d/ts_plugin/models/middle_encoders/__init__.py create mode 100644 examples/mmdetection3d/ts_plugin/models/middle_encoders/sparse_encoder.py create mode 100644 examples/mmdetection3d/ts_plugin/models/middle_encoders/sparse_unet.py create mode 100644 examples/mmdetection3d/ts_plugin/models/middle_encoders/voxel_set_abstraction.py create mode 100644 examples/mmdetection3d/ts_plugin/models/roi_heads/bbox_heads/__init__.py create mode 100644 examples/mmdetection3d/ts_plugin/models/roi_heads/bbox_heads/parta2_bbox_head.py create mode 100644 examples/openpcdet/README.md create mode 100644 examples/openpcdet/cfgs_templates/kitti_models/PartA2_plugin.yaml create mode 100644 examples/openpcdet/cfgs_templates/kitti_models/pv_rcnn_plugin.yaml create mode 100644 examples/openpcdet/cfgs_templates/kitti_models/second_plugin.yaml create mode 100644 examples/openpcdet/cfgs_templates/kitti_models/voxel_rcnn_car_plugin.yaml create mode 100644 examples/openpcdet/cfgs_templates/nuscenes_models/cbgs_voxel0075_voxelnext.yaml create mode 100644 examples/openpcdet/converted_models/README.md create mode 100644 examples/openpcdet/converter_voxelnext.py create mode 100644 examples/openpcdet/demo.ipynb create mode 100644 examples/openpcdet/pcdet_plugin/__init__.py create mode 100644 examples/openpcdet/pcdet_plugin/models/__init__.py create mode 100644 examples/openpcdet/pcdet_plugin/models/backbones_2d/__init__.py create mode 100644 examples/openpcdet/pcdet_plugin/models/backbones_2d/map_to_bev/__init__.py create mode 100644 examples/openpcdet/pcdet_plugin/models/backbones_2d/map_to_bev/height_compression.py create mode 100644 examples/openpcdet/pcdet_plugin/models/backbones_3d/__init__.py create mode 100644 examples/openpcdet/pcdet_plugin/models/backbones_3d/backbone3d.py create mode 100644 examples/openpcdet/pcdet_plugin/models/backbones_3d/backbone_voxel_next.py create mode 100644 examples/openpcdet/pcdet_plugin/models/backbones_3d/pfe.py create mode 100644 examples/openpcdet/pcdet_plugin/models/backbones_3d/unet.py create mode 100644 examples/openpcdet/pcdet_plugin/models/dense_heads/__init__.py create mode 100644 examples/openpcdet/pcdet_plugin/models/dense_heads/voxel_next_head.py create mode 100644 examples/openpcdet/pcdet_plugin/models/detectors/__init__.py create mode 100644 examples/openpcdet/pcdet_plugin/models/detectors/detector3d_template.py create mode 100644 examples/openpcdet/pcdet_plugin/models/roi_heads/__init__.py create mode 100644 examples/openpcdet/pcdet_plugin/models/roi_heads/partA2_head.py create mode 100644 examples/openpcdet/setup.py diff --git a/README.md b/README.md index 50c9707..b1b8a45 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ TorchSparse is a high-performance neural network library for point cloud process Point cloud computation has become an increasingly more important workload for autonomous driving and other applications. Unlike dense 2D computation, point cloud convolution has **sparse** and **irregular** computation patterns and thus requires dedicated inference system support with specialized high-performance kernels. While existing point cloud deep learning libraries have developed different dataflows for convolution on point clouds, they assume a single dataflow throughout the execution of the entire model. In this work, we systematically analyze and improve existing dataflows. Our resulting system, TorchSparse, achieves **2.9x**, **3.3x**, **2.2x** and **1.7x** measured end-to-end speedup on an NVIDIA A100 GPU over the state-of-the-art MinkowskiEngine, SpConv 1.2, TorchSparse (MLSys) and SpConv v2 in inference respectively. ## News +**\[2024/11\]** TorchSparse++ is now supporting [MMDetection3D](https://github.com/open-mmlab/mmdetection3d) and [OpenPCDet](https://github.com/open-mmlab/OpenPCDet) via plugins! [A full demo](./examples/) is available. **\[2023/11\]** TorchSparse++ has been adopted by [One-2-3-45++](https://arxiv.org/abs/2311.07885) from Prof. Hao Su's lab (UCSD) for 3D object generation! diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..499373a --- /dev/null +++ b/examples/README.md @@ -0,0 +1,36 @@ +# Containers +A docker image is created with all the required environment installed: `ioeddk/torchsparse_plugin_demo:latest`, including MMDetection3D, OpenPCDet, TorchSparse, plugins, and PyTorch based on the NVIDIA CUDA 12.1 image. +The dataset is not included in the image and need to be bind mounted to the container when starting. Specifically with the following command: +```bash +docker run -it --gpus all --mount type=bind,source=,target=/root/data/kitti --mount type=bind,source=,target=/root/data/nuscenes ioeddk/torchsparse_plugin_demo:latest +``` +The above is an example to mount the kitti dataset when starting the container. + +Using this container is the simplest way to start the demo of this plugin since the all the dependencies are installed and the paths are configured. You can simply open `/root/repo/torchsparse-dev/examples/mmdetection3d/demo.ipynb` or `/root/repo/torchsparse-dev/examples/openpcdet/demo.ipynb` and run all cells to run the demo. The helper functions in the demo are defined to automatically load the pretrained checkpoints, do the conversions, and run the evaluation. + +If not using the container, then please follow the tutorial below to run the demo. The same copy of demo is also in the demo notebook. + +# Convert the Module Weights +The dimensions of TorchSparse differs from the SpConv, so the parameter dimension conversion is required to use the TorchSparse backend. The conversion script can be found in `examples/converter.py`. The `convert_weighs` function has the header `def convert_weights(ckpt_before: str, ckpt_after: str, cfg_path: str, v_spconv: int = 1, framework: str = "mmdet3d")`: +- `ckpt_before`: the pretrained checkpoint of your module, typically downloaded from the MMDetection3d and OpenPCDet model Zoo. +- `ckpt_after`: the output path for the converted checkpoint. +- `cfg_path`: the path to the config file of the MMdet3d or OPC model to be converted. It is requried since the converter create an instance of the model, find all the Sparse Convolution layers, and convert the weights of thay layer. +- `v_spconv`: the version of the SpConv that the original model is build upon. Valud versions are 1 or 2. +- `framework`: choose between `mmdet3d` and `openpc`. + +## Example Conversion Commands +### MMDetection3D +```bash +python examples/converter.py --ckpt_before ../mmdetection3d/models/PV-RCNN/pv_rcnn_8xb2-80e_kitti-3d-3class_20221117_234428-b384d22f.pth --cfg_path ../mmdetection3d/pv_rcnn/pv_rcnn_8xb2-80e_kitti-3d-3class.py --ckpt_after ./converted/PV-RCNN/pv_rcnn_8xb2-80e_kitti-3d-3class_20221117_234428-b384d22f.pth --v_spconv 1 --framework mmdet3d +``` + +### OpenPCDet +```bash +python examples/converter.py --ckpt_before ../OpenPCDet/models/SECOND/second_7862.pth --cfg_path ../OpenPCDet/tools/cfgs/kitti_models/second.yaml --ckpt_after ./converted/SECOND/second_7862.pth --v_spconv 1 --framework openpc +``` + +# Run evaluation. +Use the `test.py` that comes with the MMDet3D or OPC to run the evaluation. Provide the converted checkpoint as the model weights. For MMDet3D models, you need to provide extra arguments to replace certain layers to be torchsparse's (see how to replace them in `examples/mmdetection3d/demo.ipynb`). For OpenPCDet, the config file with those layers replaced is in the `examples/openpcdet/cfgs`; to use them, see `examples/openpcdet/demo.ipynb`. An additional step is to add `import ts_plugin` in `mmdetection3d/tools/test.py` and add `import pcdet_plugin` in `OpenPCDet/tools/test.py` to activate the plugins before running the evaluation. + +# Details +Please see `examples/mmdetection3d/demo.ipynb` and `examples/openpcdet/demo.ipynb` for more details. diff --git a/examples/converter.py b/examples/converter.py new file mode 100644 index 0000000..6301ceb --- /dev/null +++ b/examples/converter.py @@ -0,0 +1,240 @@ +"""This is the model converter to convert a SpConv model to TorchSparse model. +""" +import argparse +import torch +import re +import logging +import spconv.pytorch as spconv +import logging + +# Disable JIT because running OpenPCDet with JIT enabled will cause some import issue. +torch.jit._state.disable() + +# Works for SECOND +def convert_weights_v2(key, model): + """Convert model weights for models build with SpConv v2. + + :param key: _description_ + :type key: _type_ + :param model: _description_ + :type model: _type_ + :return: _description_ + :rtype: _type_ + """ + new_key = key.replace(".weight", ".kernel") + weights = model[key] + oc, kx, ky, kz, ic = weights.shape + + converted_weights = weights.reshape(oc, -1, ic) + + converted_weights = converted_weights.permute(1, 0, 2) + + if converted_weights.shape[0] == 1: + converted_weights = converted_weights[0] + elif converted_weights.shape[0] == 27: + offsets = [list(range(kz)), list(range(ky)), list(range(kx))] + kykx = ky * kx + offsets = [ + (x * kykx + y * kx + z) + for z in offsets[0] + for y in offsets[1] + for x in offsets[2] + ] + offsets = torch.tensor( + offsets, dtype=torch.int64, device=converted_weights.device + ) + converted_weights = converted_weights[offsets] + + converted_weights = converted_weights.permute(0,2,1) + + return new_key, converted_weights + +# Order for CenterPoint, PV-RCNN, and default, legacy SpConv +def convert_weights_v1(key, model): + """Convert model weights for models implemented with SpConv v1 + + :param key: _description_ + :type key: _type_ + :param model: _description_ + :type model: _type_ + :return: _description_ + :rtype: _type_ + """ + new_key = key.replace(".weight", ".kernel") + weights = model[key] + + kx, ky, kz, ic, oc = weights.shape + + converted_weights = weights.reshape(-1, ic, oc) + if converted_weights.shape[0] == 1: + converted_weights = converted_weights[0] + + elif converted_weights.shape[0] == 27: + offsets = [list(range(kz)), list(range(ky)), list(range(kx))] + kykx = ky * kx + offsets = [ + (x * kykx + y * kx + z) + for z in offsets[0] + for y in offsets[1] + for x in offsets[2] + ] + offsets = torch.tensor( + offsets, dtype=torch.int64, device=converted_weights.device + ) + converted_weights = converted_weights[offsets] + elif converted_weights.shape[0] == 3: # 3 is the case in PartA2. + pass + # offsets = torch.tensor( + # [2, 1, 0], dtype=torch.int64, device=converted_weights.device + # ) + # converted_weights = converted_weights[offsets] + return new_key, converted_weights + +def build_mmdet_model_from_cfg(cfg_path, ckpt_path): + try: + from mmdet3d.apis import init_model + from mmengine.config import Config + except: + print("MMDetection3D is not installed. Please install MMDetection3D to use this function.") + cfg = Config.fromfile(cfg_path) + model = init_model(cfg, ckpt_path) + return model + +def build_opc_model_from_cfg(cfg_path): + try: + from pcdet.config import cfg, cfg_from_yaml_file + from pcdet.datasets import build_dataloader + from pcdet.models import build_network + except Exception as e: + print(e) + raise ImportError("Failed to import OpenPCDet") + cfg_from_yaml_file(cfg_path, cfg) + test_set, test_loader, sampler = build_dataloader( + dataset_cfg=cfg.DATA_CONFIG, + class_names=cfg.CLASS_NAMES, + batch_size=1, + dist=False, + training=False, + logger=logging.Logger("Build Dataloader"), + ) + + model = build_network(model_cfg=cfg.MODEL, num_class=len(cfg.CLASS_NAMES), dataset=test_set) + return model + +# Allow use the API to convert based on a passed in model. +def convert_model_weights(ckpt_before, ckpt_after, model, legacy=False): + + model_modules = {} + for key, value in model.named_modules(): + model_modules[key] = value + + cp_old = torch.load(ckpt_before, map_location="cpu") + try: + opc = False + old_state_dict = cp_old["state_dict"] + except: + opc = True + old_state_dict = cp_old["model_state"] + + new_model = dict() + + for state_dict_key in old_state_dict.keys(): + is_sparseconv_weight = False + if state_dict_key.endswith(".weight"): + if state_dict_key[:-len(".weight")] in model_modules.keys(): + if isinstance(model_modules[state_dict_key[:-len(".weight")]], (spconv.SparseConv3d, spconv.SubMConv3d, spconv.SparseInverseConv3d)): + is_sparseconv_weight = True + + if is_sparseconv_weight: + # print(f"{state_dict_key} is a sparseconv weight") + pass + + if is_sparseconv_weight: + if len(old_state_dict[state_dict_key].shape) == 5: + if legacy: + new_key, converted_weights = convert_weights_v1(state_dict_key, old_state_dict) + else: + new_key, converted_weights = convert_weights_v2(state_dict_key, old_state_dict) + else: + new_key = state_dict_key + converted_weights = old_state_dict[state_dict_key] + + new_model[new_key] = converted_weights + + if opc: + cp_old["model_state"] = new_model + else: + cp_old["state_dict"] = new_model + torch.save(cp_old, ckpt_after) + + +def convert_weights_cmd(): + """Convert the weights of a model from SpConv to TorchSparse. + + :param ckpt_before: Path to the SpConv checkpoint + :type ckpt_before: str + :param ckpt_after: Path to the output folder of the converted checkpoint. + :type ckpt_after: str + :param v_spconv: SpConv version used for the weights. Can be one of 1 or 2, defaults to "1" + :type v_spconv: str, optional + :param framework: From which framework does the model weight comes from, choose one of mmdet3d or openpc, defaults to "mmdet3d" + :type framework: str, optional + """ + # ckpt_before, ckpt_after, v_spconv="1", framework="mmdet3d" + + # argument parser + parser = argparse.ArgumentParser(description="Convert SpConv model to TorchSparse model") + parser.add_argument("--ckpt_before", help="Path to the SpConv checkpoint") + parser.add_argument("--ckpt_after", help="Path to the output folder of the converted checkpoint.") + parser.add_argument("--cfg_path", help="Path to the config file of the model") + parser.add_argument("--v_spconv", default="1", help="SpConv version used for the weights. Can be one of 1 or 2") + parser.add_argument("--framework", default="mmdet3d", help="From which framework does the model weight comes from, choose one of mmdet3d or openpc") + args = parser.parse_args() + + # Check the plugin argument + assert args.framework in ['mmdet3d', 'openpc'], "plugin argument can only be mmdet3d or openpcdet" + assert args.v_spconv in ['1', '2'], "v_spconv argument can only be 1 or 2" + + legacy = True if args.v_spconv == "1" else False + cfg_path = args.cfg_path + + model = build_mmdet_model_from_cfg(cfg_path, args.ckpt_before) if args.framework == "mmdet3d" else build_opc_model_from_cfg(cfg_path) + convert_model_weights( + ckpt_before=args.ckpt_before, + ckpt_after=args.ckpt_after, + model=model, + legacy=legacy) + + +def convert_weights(ckpt_before: str, ckpt_after: str, cfg_path: str, v_spconv: int = 1, framework: str = "mmdet3d"): + """Convert the weights of a model from SpConv to TorchSparse. + + :param ckpt_before: _description_ + :type ckpt_before: str + :param ckpt_after: _description_ + :type ckpt_after: str + :param cfg_path: _description_ + :type cfg_path: str + :param v_spconv: _description_, defaults to 1 + :type v_spconv: int, optional + :param framework: _description_, defaults to "mmdet3d" + :type framework: str, optional + """ + + # Check the plugin argument + assert framework in ['mmdet3d', 'openpc'], "plugin argument can only be mmdet3d or openpcdet" + assert v_spconv in [1, 2], "v_spconv argument can only be 1 or 2" + + legacy = True if v_spconv == 1 else False + + model = build_mmdet_model_from_cfg(cfg_path, ckpt_before) if framework == "mmdet3d" else build_opc_model_from_cfg(cfg_path) + convert_model_weights( + ckpt_before=ckpt_before, + ckpt_after=ckpt_after, + model=model, + legacy=legacy) + + +if __name__ == "__main__": + convert_weights_cmd() + print("Conversion completed") diff --git a/examples/mmdetection3d/README.md b/examples/mmdetection3d/README.md new file mode 100644 index 0000000..72b68de --- /dev/null +++ b/examples/mmdetection3d/README.md @@ -0,0 +1,79 @@ +# TorchSparse for MMDetection3D Plugin Demo + +This tutorial demonstrates how to evaluate TorchSparse integrated MMDetection3D models. Follow the steps below to install dependencies, configure paths, convert model weights, and run the demo. + +## Dependencies + +1. **MMDetection3D Installation**: Follow the [MMDetection3D documentation](https://mmdetection3d.readthedocs.io/en/latest/get_started.html). +2. **Dataset Preparation**: Pre-process the datasets as described [here](https://mmdetection3d.readthedocs.io/en/latest/user_guides/dataset_prepare.html). +3. **TorchSparse Installation**: Install [TorchSparse](https://github.com/mit-han-lab/torchsparse). +4. **Install TorchSparse Plugin for MMDetection3D**: + 1. Clone this repository. + 2. Navigate to `examples/mmdetection3d` and run `pip install -v -e .`. + +## Notes + +- For model evaluation, change the data root in the original MMDetection3D's model config to the full path of the corresponding dataset root. + +## Steps + +1. Install the dependencies. +2. Specify the base paths and model registry. +3. **IMPORTANT,** Activate the plugin: In `mmdetection3d/tools/test.py`, add `import ts_plugin` as the last import statement to activate the plugin. +4. Run the evaluation. + +## Supported Models + +- SECOND +- PV-RCNN +- CenterPoint +- Part-A2 + +## Convert Module Weights +The dimensions of TorchSparse differ from SpConv, so parameter dimension conversion is required. You can use `convert_weights_cmd()` in converter.py as a command line tool or use `convert_weights()` as an API. Both functions have four parameters: + +1. `ckpt_before`: Path to the input SpConv checkpoint file. +2. `ckpt_after`: Path where the converted TorchSparse checkpoint will be saved. +3. `cfg_path`: Path to the configuration mmdet3d file of the model. +4. `v_spconv`: Version of SpConv used in the original model (1 or 2). +5. `framework`: Choose between `'openpc'` and `'mmdet3d'`, default to `'mmdet3d'`. + +These parameters allow the converter to locate the input model, specify the output location, understand the model's architecture, and apply the appropriate conversion method based for specific Sparse Conv layers. + +Example conversion commands: +```bash +python examples/converter.py --ckpt_before ../mmdetection3d/models/PV-RCNN/pv_rcnn_8xb2-80e_kitti-3d-3class_20221117_234428-b384d22f.pth --cfg_path ../mmdetection3d/pv_rcnn/pv_rcnn_8xb2-80e_kitti-3d-3class.py --ckpt_after ./converted/PV-RCNN/pv_rcnn_8xb2-80e_kitti-3d-3class_20221117_234428-b384d22f.pth --v_spconv 1 --framework mmdet3d +``` + + +# Run a demo +In your Conda environment, run: +```bash +python --task lidar_det +``` + +- `test_file_path`: The `tools/test.py` file in mmdet3d repository. +- `cfg_path`: The path to the mmdet3d's model config for your model. +- `torchsparse_model_path`: the path to the converted TorchSparse model checkpoint. +- `cfg_options`: The plugin requires the use of MMDet3D cfg_options to tweak certain model layers to be the plugin layers. `cfg_options` examples are below: + +## SECOND +`cfg_options`: +```bash +"--cfg-options test_evaluator.pklfile_prefix=outputs/torchsparse/second --cfg-options model.middle_encoder.type=SparseEncoderTS" +``` + +## PV-RCNN +`cfg_options`: +```bash +"--cfg-options test_evaluator.pklfile_prefix=outputs/torchsparse/pv_rcnn --cfg-options model.middle_encoder.type=SparseEncoderTS --cfg-options model.points_encoder.type=VoxelSetAbstractionTS" +``` + +### CenterPoint Voxel 0.1 Circular NMS + +Update the path of the NuScenes dataset in the MMDetection3D dataset config `configs/_base_/datasets/nus-3d.py`. + +`cfg_options`: +```bash +"--cfg-options model.pts_middle_encoder.type=SparseEncoderTS" +``` \ No newline at end of file diff --git a/examples/mmdetection3d/configs/README.md b/examples/mmdetection3d/configs/README.md new file mode 100644 index 0000000..f6c60b2 --- /dev/null +++ b/examples/mmdetection3d/configs/README.md @@ -0,0 +1 @@ +This folder contains the configs to carry out the demo in mmdetectino3d. \ No newline at end of file diff --git a/examples/mmdetection3d/converted_models/README.md b/examples/mmdetection3d/converted_models/README.md new file mode 100644 index 0000000..97d7e95 --- /dev/null +++ b/examples/mmdetection3d/converted_models/README.md @@ -0,0 +1 @@ +Default model conversion base folder for the demo. Please create the relative path to each specific model under this directory. diff --git a/examples/mmdetection3d/demo.ipynb b/examples/mmdetection3d/demo.ipynb new file mode 100644 index 0000000..fde59e9 --- /dev/null +++ b/examples/mmdetection3d/demo.ipynb @@ -0,0 +1,512 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TorchSparse for MMDetection3D Plugin Demo\n", + "You can run the cells below to run the evaluation of TorchSparse integrated MMDetection3D models. \n", + "\n", + "## Dependencies\n", + "- MMDetection3D installation: Please follow the [MMDetection3D documentation](https://mmdetection3d.readthedocs.io/en/latest/get_started.html). \n", + "- Pre-process the datasets required by MMDetection3D ([see here](https://mmdetection3d.readthedocs.io/en/latest/user_guides/dataset_prepare.html)). \n", + "- TorchSparse installation. \n", + "- Install TorchSparse plugin for MMDetection3D\n", + " 1. Clone this repository\n", + " 2. Go to `examples/mmdetection3d` and run `pip install -v -e .`\n", + "\n", + "## Notes\n", + "1. For model evaluation, you need to change the data root in the original mmdetection3d's model config to be the full path of the corresponding dataset root. The default is the relative path because mmdet3d expect you to run the evaluation under their repository folder. However, to run this demo, the relative path won't work and you need to change it to the full path. \n", + "\n", + "# Steps\n", + "1. Install the dependencies. \n", + "2. Specify the base pathes and model registry. \n", + "3. Activate the plugin: In `mmdetection3d/tools/test.py`, add `import ts_plugin` as the last import statement. \n", + "4. Run demo. \n", + "5. Print the evaluation results. \n", + "\n", + "# Lists of Supported Models\n", + "- SECOND\n", + "- PV-RCNN\n", + "- CenterPoint\n", + "- PartA2\n", + "\n", + "# The Actual Part\n", + "## Load the Weight Conversion Module\n", + "The dimensions of TorchSparse differs from the SpConv, so the parameter dimension conversion is required to use the TorchSparse backend. The following cell loads the converter. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import importlib.util\n", + "import sys, os\n", + "from pathlib import Path\n", + "import subprocess\n", + "\n", + "# Define the relative path to the file\n", + "relative_path = \"../converter.py\"\n", + "file_path = Path().resolve() / relative_path\n", + "\n", + "# Add the directory containing the file to sys.path\n", + "sys.path.append(str(file_path.parent))\n", + "\n", + "# Load the module\n", + "spec = importlib.util.spec_from_file_location(\"convert_weights\", str(file_path))\n", + "converter = importlib.util.module_from_spec(spec)\n", + "spec.loader.exec_module(converter)\n", + "\n", + "converter = getattr(converter, \"convert_weights\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Dummy check for whether the weight converter is successfully loaded. \n", + "print(converter)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Specify the Paths and Environment Parameters\n", + "To run this demo, you need to provide the following paths:\n", + "1. `mmdet3d_path`: MMDetection3D installation path. We need this path to find the `test.py` evaluation script. \n", + "2. `mmdet3d_model_base_path`: Input pretrained weight path. the pretrained weights you download from the MMDetection3D model zoo should be put under a same base folder. \n", + "3. `torchsparse_model_base_path`: Output pretrained weight path. The converted weights for various models should be put under the same base folder as well. \n", + "4. `mmdet3d_cfg_base_path`: MMDetection3D configuration files base path. This configuration file is required in the model conversion. Specifically, it use the original configuration file to create a model to identify the Sparse Conv modules, and convert the weights for only those modules. By default, if you installed the mmdet3d in development mode with `-e` then this should just be the `config` folder in the mmdet3d repo. \n", + "5. Conda environment name: this demo initialize a sub-shell to execute the demo with `subprocess`. So you need to specify the name of the conda environment that you want to use to run the demo. \n", + "\n", + "For paths 2, 3, and 4, we expect you to organize them by having a base path and put the checkpoint/configurations files of different models under the same basepath. For example, for the input pertrained weight path, the file structure looks like: \n", + "\n", + "```text\n", + "mmdet_model_base_folder/ \n", + "├── SECOND/ \n", + "│ └── SECOND_Checkpoint.pth\n", + "├── PV-RCNN/\n", + "│ └── PV-RCNN_Checkpoint.pth\n", + "└── CenterPoint/\n", + " └── CenterPoint_Checkpoint.pth\n", + "```\n", + "To configure the path for SECOND demo, you need to configure the `mmdet3d_model_base_path` to the path of the folder `mmdet_model_base_folder` and in the SECOND's registry entry, set `ckpt_before` to be `SECOND/SECOND_Checkpoint.pth`. \n", + "\n", + "In addition to the paths, we also need you to specify:\n", + "1. SpConv version of the original model.\n", + "2. `cfg_options`: some modules in the model is replaced by the TorchSparse layers. When running the evaluation, you don't need to provide a new configuration file to specify the use of TorchSparse layers. You can rather use the original mmdetection3d config file but use the `cfg_options` to replace certain modules to use the TorchSparse module. Typically, only one or two modules needed to be replaced. You can see the specific usage from the exaple below. \n", + "3. Name of the conda environment the dependencies is installed. \n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "env_name = \"torchsparse\"\n", + "\n", + "# Please complete the following base paths. \n", + "base_paths = {\n", + " 'mmdet3d_path': None,\n", + " 'mmdet3d_model_base_path': None,\n", + " 'torchsparse_model_base_path': os.path.join(os.path.abspath(''), \"converted_models\"),\n", + " 'mmdet3d_cfg_base_path': None\n", + "}\n", + "\n", + "# Specify the model specific path and registry values. \n", + "# NOTE: ckpt_before is associated with the mmdet3d_model_base_path and ckpt_after is associated with the torchsparse_model_base_path. \n", + "second_3d_car = {\n", + " 'ckpt_before': 'SECOND/second_hv_secfpn_8xb6-80e_kitti-3d-car-75d9305e.pth',\n", + " 'ckpt_after': 'SECOND/second_hv_secfpn_8xb6-80e_kitti-3d-car-75d9305e.pth',\n", + " 'cfg_path': 'second/second_hv_secfpn_8xb6-80e_kitti-3d-car.py',\n", + " 'v_spconv': 2,\n", + " 'cfg_options': \"--cfg-options test_evaluator.pklfile_prefix=outputs/torchsparse/second --cfg-options model.middle_encoder.type=SparseEncoderTS\"\n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'mmdet3d_path': '/home/yingqi/repo/mmdetection3d',\n", + " 'mmdet3d_model_base_path': '/home/yingqi/repo/mmdetection3d/models',\n", + " 'torchsparse_model_base_path': '/home/yingqi/repo/torchsparse-dev/examples/mmdetection3d/converted_models',\n", + " 'mmdet3d_cfg_base_path': '/home/yingqi/repo/mmdetection3d/configs'}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "base_paths" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The function to run a single demo is defined below. Based on the configuration dictionary you provid, it convert the model weights then use the `tools/test.py` in the `mmdetection3d` repo to run the model evaluation. " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def mmdet3d_single_demo(registry_entry, base_paths, convert=True):\n", + " \"\"\"Run Single Model Demo\n", + "\n", + " :param registry_entry: the model demo registry. \n", + " :type registry_entry: dict\n", + " :param base_paths: the base paths. \n", + " :type base_paths: dict\n", + " :param convert: whether to convert the model. If set to false, it skip the model conversion and use the provided checkpoint to run model evaluation directly. Defaults to True.\n", + " :type convert: bool, optional\n", + " :return: return the process object that used to run the demo. \n", + " :rtype: CompletedProcess\n", + " \"\"\"\n", + "\n", + " assert os.path.isdir(base_paths['mmdet3d_path']), \"Please specify the mmdet3d_path in the base_paths.\"\n", + " assert os.path.isdir(base_paths['mmdet3d_model_base_path']), \"Please specify the mmdet3d_model_base_path in the base_paths.\"\n", + " assert os.path.isdir(base_paths['torchsparse_model_base_path']), \"Please specify the torchsparse_model_base_path in the base_paths.\"\n", + " assert os.path.isdir(base_paths['mmdet3d_cfg_base_path']), \"Please specify the mmdet3d_cfg_base_path in the base_paths.\"\n", + "\n", + " # pre-process paths\n", + " cfg_path = os.path.join(base_paths['mmdet3d_cfg_base_path'], registry_entry['cfg_path'])\n", + " test_file_path = os.path.join(base_paths['mmdet3d_path'], \"tools/test.py\")\n", + " mmdet3d_model_path = os.path.join(base_paths['mmdet3d_model_base_path'], registry_entry['ckpt_before'])\n", + " assert os.path.isdir(base_paths['torchsparse_model_base_path']), \"Please create the directory for the converted model.\"\n", + " torchsparse_model_path = os.path.join(base_paths['torchsparse_model_base_path'], registry_entry['ckpt_after'])\n", + " \n", + " cfg_options = registry_entry['cfg_options']\n", + " # convert the model\n", + " if convert:\n", + " parent_dir = os.path.dirname(torchsparse_model_path)\n", + " if not os.path.exists(parent_dir):\n", + " os.makedirs(parent_dir)\n", + " converter(\n", + " ckpt_before=mmdet3d_model_path,\n", + " ckpt_after=torchsparse_model_path,\n", + " cfg_path=cfg_path,\n", + " v_spconv = registry_entry['v_spconv']\n", + " )\n", + "\n", + " command = f'bash -c \"conda activate {env_name}; python {test_file_path} {cfg_path} {torchsparse_model_path} {cfg_options} --task lidar_det\"'\n", + " print(command)\n", + " result = subprocess.run(command, capture_output=True, text=True, shell=True, executable='/bin/bash')\n", + " return result # result have .stdout and .stderr attributes to get the output. \n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluate MMDetection3d Models\n", + "\n", + "### SECOND\n", + "Run a SECOND demo. You can print the evaluation results of the model from the sub-process's `stdout` and `stderr`. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "second_results = mmdet3d_single_demo(second_3d_car, base_paths, convert=True)\n", + "print(second_results.stderr)\n", + "print(second_results.stdout)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Expected Output: \n", + "\n", + "```\n", + "----------- AP11 Results ------------\n", + "\n", + "Car AP11@0.70, 0.70, 0.70:\n", + "bbox AP11:95.2015, 89.6519, 88.0073\n", + "bev AP11:89.9621, 87.2725, 84.2825\n", + "3d AP11:88.3629, 78.2199, 76.0327\n", + "aos AP11:94.94, 89.08, 87.23\n", + "Car AP11@0.70, 0.50, 0.50:\n", + "bbox AP11:95.2015, 89.6519, 88.0073\n", + "bev AP11:95.3329, 89.9520, 88.7400\n", + "3d AP11:95.2805, 89.8595, 88.5336\n", + "aos AP11:94.94, 89.08, 87.23\n", + "\n", + "----------- AP40 Results ------------\n", + "\n", + "Car AP40@0.70, 0.70, 0.70:\n", + "bbox AP40:97.4063, 92.4550, 89.2481\n", + "bev AP40:92.6387, 88.4049, 85.2355\n", + "3d AP40:90.4511, 81.3433, 76.1927\n", + "aos AP40:97.13, 91.81, 88.42\n", + "Car AP40@0.70, 0.50, 0.50:\n", + "bbox AP40:97.4063, 92.4550, 89.2481\n", + "bev AP40:97.5160, 94.7415, 91.7295\n", + "3d AP40:97.3701, 94.5687, 91.4920\n", + "aos AP40:97.13, 91.81, 88.42\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### PV-RCNN\n", + "Run a PV-RCNN Demo." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# PV-RCNN Registry\n", + "pv_rcnn_config = {\n", + " \"ckpt_before\": \"PV-RCNN/pv_rcnn_8xb2-80e_kitti-3d-3class_20221117_234428-b384d22f.pth\",\n", + " \"ckpt_after\": \"PV-RCNN/pv_rcnn_8xb2-80e_kitti-3d-3class_20221117_234428-b384d22f.pth\",\n", + " \"cfg_path\": \"pv_rcnn/pv_rcnn_8xb2-80e_kitti-3d-3class.py\",\n", + " \"v_spconv\": 1,\n", + " \"cfg_options\": \"--cfg-options test_evaluator.pklfile_prefix=outputs/torchsparse/pv_rcnn --cfg-options model.middle_encoder.type=SparseEncoderTS --cfg-options model.points_encoder.type=VoxelSetAbstractionTS\"\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pv_rcnn_results = mmdet3d_single_demo(pv_rcnn_config, base_paths, convert=True)\n", + "print(pv_rcnn_results.stderr)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(pv_rcnn_results.stdout)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Expected Output: \n", + "\n", + "```\n", + "----------- AP11 Results ------------\n", + "\n", + "Pedestrian AP11@0.50, 0.50, 0.50:\n", + "bbox AP11:74.1319, 68.4703, 65.9149\n", + "bev AP11:68.2026, 62.7491, 57.1043\n", + "3d AP11:66.6080, 59.7569, 55.1617\n", + "aos AP11:68.98, 63.64, 60.68\n", + "Pedestrian AP11@0.50, 0.25, 0.25:\n", + "bbox AP11:74.1319, 68.4703, 65.9149\n", + "bev AP11:80.5667, 76.2589, 72.7974\n", + "3d AP11:80.3568, 76.0134, 72.2977\n", + "aos AP11:68.98, 63.64, 60.68\n", + "Cyclist AP11@0.50, 0.50, 0.50:\n", + "bbox AP11:89.2310, 82.0387, 77.1643\n", + "bev AP11:87.8058, 74.9448, 70.5274\n", + "3d AP11:87.2027, 73.2608, 69.6121\n", + "aos AP11:89.13, 81.69, 76.77\n", + "Cyclist AP11@0.50, 0.25, 0.25:\n", + "bbox AP11:89.2310, 82.0387, 77.1643\n", + "bev AP11:88.6302, 80.1792, 74.8060\n", + "3d AP11:88.6302, 80.1792, 74.8060\n", + "aos AP11:89.13, 81.69, 76.77\n", + "Car AP11@0.70, 0.70, 0.70:\n", + "bbox AP11:96.0265, 89.5369, 89.1852\n", + "bev AP11:90.1265, 88.0958, 87.6436\n", + "3d AP11:89.2321, 83.7058, 78.7935\n", + "aos AP11:95.98, 89.43, 89.03\n", + "Car AP11@0.70, 0.50, 0.50:\n", + "bbox AP11:96.0265, 89.5369, 89.1852\n", + "bev AP11:96.1496, 94.8182, 89.2712\n", + "3d AP11:96.0921, 89.5371, 89.2317\n", + "aos AP11:95.98, 89.43, 89.03\n", + "\n", + "Overall AP11@easy, moderate, hard:\n", + "bbox AP11:86.4631, 80.0153, 77.4215\n", + "bev AP11:82.0450, 75.2632, 71.7584\n", + "3d AP11:81.0143, 72.2412, 67.8558\n", + "aos AP11:84.70, 78.25, 75.49\n", + "\n", + "----------- AP40 Results ------------\n", + "\n", + "Pedestrian AP40@0.50, 0.50, 0.50:\n", + "bbox AP40:75.6494, 69.7741, 66.0890\n", + "bev AP40:69.5448, 62.1173, 57.1881\n", + "3d AP40:66.6659, 59.2055, 54.1700\n", + "aos AP40:70.00, 64.19, 60.31\n", + "Pedestrian AP40@0.50, 0.25, 0.25:\n", + "bbox AP40:75.6494, 69.7741, 66.0890\n", + "bev AP40:82.8723, 78.0379, 73.2982\n", + "3d AP40:82.6538, 77.1948, 72.8713\n", + "aos AP40:70.00, 64.19, 60.31\n", + "Cyclist AP40@0.50, 0.50, 0.50:\n", + "bbox AP40:93.8638, 84.2218, 80.1001\n", + "bev AP40:92.8451, 75.6214, 71.7649\n", + "3d AP40:90.3880, 73.2361, 69.4116\n", + "aos AP40:93.71, 83.84, 79.61\n", + "Cyclist AP40@0.50, 0.25, 0.25:\n", + "bbox AP40:93.8638, 84.2218, 80.1001\n", + "bev AP40:93.9661, 81.6019, 77.2742\n", + "3d AP40:93.9661, 81.6019, 77.2742\n", + "aos AP40:93.71, 83.84, 79.61\n", + "Car AP40@0.70, 0.70, 0.70:\n", + "bbox AP40:97.8348, 94.5482, 94.0081\n", + "bev AP40:94.4796, 90.7830, 88.6291\n", + "3d AP40:91.8635, 84.5625, 82.4022\n", + "aos AP40:97.80, 94.41, 93.80\n", + "Car AP40@0.70, 0.50, 0.50:\n", + "bbox AP40:97.8348, 94.5482, 94.0081\n", + "bev AP40:97.9316, 96.4609, 94.4074\n", + "3d AP40:97.8820, 94.6416, 94.3069\n", + "aos AP40:97.80, 94.41, 93.80\n", + "\n", + "Overall AP40@easy, moderate, hard:\n", + "bbox AP40:89.1160, 82.8480, 80.0657\n", + "bev AP40:85.6232, 76.1739, 72.5274\n", + "3d AP40:82.9724, 72.3347, 68.6612\n", + "aos AP40:87.17, 80.81, 77.91\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### CenterPoint Voxel 0.1 Circular NMS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is the first NuScenes model in the demo. Please remember to update the path of the NuScenes dataset in the mmdetection3d dataset config `configs/_base_/datasets/nus-3d.py`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "centerpoint_config = {\n", + " \"ckpt_before\": \"CenterPoint/centerpoint_01voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus_20220810_030004-9061688e.pth\",\n", + " \"ckpt_after\": \"CenterPoint/centerpoint_01voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus_20220810_030004-9061688e.pth\",\n", + " \"cfg_path\": \"centerpoint/centerpoint_voxel01_second_secfpn_head-circlenms_8xb4-cyclic-20e_nus-3d.py\",\n", + " \"v_spconv\": 1,\n", + " \"cfg_options\": \"--cfg-options model.pts_middle_encoder.type=SparseEncoderTS\"\n", + "}\n", + "\n", + "centerpoint_results = mmdet3d_single_demo(centerpoint_config, base_paths, convert=True)\n", + "print(centerpoint_results.stderr)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(centerpoint_results.stdout)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(centerpoint_results.stdout)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Expected Outputs: \n", + "\n", + "```\n", + "Evaluating bboxes of pred_instances_3d\n", + "mAP: 0.5544\n", + "mATE: 0.2988\n", + "mASE: 0.2538\n", + "mAOE: 0.3110\n", + "mAVE: 0.3039\n", + "mAAE: 0.1977\n", + "NDS: 0.6407\n", + "Eval time: 53.4s\n", + "\n", + "Per-class results:\n", + "Object Class\tAP\tATE\tASE\tAOE\tAVE\tAAE\n", + "car\t0.845\t0.186\t0.152\t0.113\t0.304\t0.194\n", + "truck\t0.522\t0.324\t0.185\t0.126\t0.283\t0.240\n", + "bus\t0.667\t0.354\t0.181\t0.062\t0.535\t0.268\n", + "trailer\t0.362\t0.546\t0.207\t0.447\t0.208\t0.164\n", + "construction_vehicle\t0.160\t0.639\t0.414\t0.858\t0.117\t0.334\n", + "pedestrian\t0.827\t0.165\t0.276\t0.410\t0.244\t0.101\n", + "motorcycle\t0.529\t0.213\t0.237\t0.292\t0.511\t0.264\n", + "bicycle\t0.341\t0.169\t0.268\t0.421\t0.229\t0.016\n", + "traffic_cone\t0.638\t0.162\t0.342\tnan\tnan\tnan\n", + "barrier\t0.653\t0.230\t0.277\t0.070\tnan\tnan\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "torchsparse", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/mmdetection3d/scripts/run_evaluation/SECOND.sh b/examples/mmdetection3d/scripts/run_evaluation/SECOND.sh new file mode 100755 index 0000000..8f87052 --- /dev/null +++ b/examples/mmdetection3d/scripts/run_evaluation/SECOND.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +export MMDET3D_HOME="/home/yingqi/repo/mmdetection3d" && python ${MMDET3D_HOME}/tools/test.py ${MMDET3D_HOME}/configs/second/second_hv_secfpn_8xb6-80e_kitti-3d-3class.py /home/ioeddk/GitHub/torchsparse-dev/examples/mmdetection3d/pretrained_models/backup/second/second_hv_secfpn_8xb6-80e_kitti-3d-3class-b086d0a3-converted.pth --cfg-options test_evaluator.pklfile_prefix=outputs/torchsparse/second --cfg-options model.middle_encoder.type=SparseEncoderTS --task lidar_det \ No newline at end of file diff --git a/examples/mmdetection3d/setup.py b/examples/mmdetection3d/setup.py new file mode 100644 index 0000000..e81bda7 --- /dev/null +++ b/examples/mmdetection3d/setup.py @@ -0,0 +1,7 @@ +from setuptools import setup, find_packages + +setup( + name='ts_plugin', + version='0.1', + packages=find_packages(), +) diff --git a/examples/mmdetection3d/ts_plugin/__init__.py b/examples/mmdetection3d/ts_plugin/__init__.py new file mode 100644 index 0000000..a076973 --- /dev/null +++ b/examples/mmdetection3d/ts_plugin/__init__.py @@ -0,0 +1,7 @@ +from .models import * + +from mmengine.registry import MODELS + +from torchsparse.nn import BatchNorm + +MODELS.register_module('TorchSparseBatchNorm', force=True, module=BatchNorm) \ No newline at end of file diff --git a/examples/mmdetection3d/ts_plugin/models/__init__.py b/examples/mmdetection3d/ts_plugin/models/__init__.py new file mode 100644 index 0000000..e3b0711 --- /dev/null +++ b/examples/mmdetection3d/ts_plugin/models/__init__.py @@ -0,0 +1,4 @@ +from .layers import * +from .middle_encoders import * +from .roi_heads.bbox_heads import * +from .backbones import * \ No newline at end of file diff --git a/examples/mmdetection3d/ts_plugin/models/backbones/__init__.py b/examples/mmdetection3d/ts_plugin/models/backbones/__init__.py new file mode 100644 index 0000000..fc5ae4f --- /dev/null +++ b/examples/mmdetection3d/ts_plugin/models/backbones/__init__.py @@ -0,0 +1 @@ +from .resnet import BasicBlockTS \ No newline at end of file diff --git a/examples/mmdetection3d/ts_plugin/models/backbones/resnet.py b/examples/mmdetection3d/ts_plugin/models/backbones/resnet.py new file mode 100644 index 0000000..5ac8f27 --- /dev/null +++ b/examples/mmdetection3d/ts_plugin/models/backbones/resnet.py @@ -0,0 +1,679 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch.nn as nn +import torch.utils.checkpoint as cp +from mmcv.cnn import build_conv_layer, build_norm_layer, build_plugin_layer +from mmengine.model import BaseModule +from torch.nn.modules.batchnorm import _BatchNorm + +from mmdet.registry import MODELS +from mmdet.models.layers import ResLayer + +import os +from torchsparse.nn import ReLU as TSReLU + +# Inherite BascBlock and change attribute + +class BasicBlockTS(BaseModule): + expansion = 1 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + plugins=None, + init_cfg=None): + super(BasicBlockTS, self).__init__(init_cfg) + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) + self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) + + self.conv1 = build_conv_layer( + conv_cfg, + inplanes, + planes, + 3, + stride=stride, + padding=dilation, + dilation=dilation, + bias=False) + self.add_module(self.norm1_name, norm1) + self.conv2 = build_conv_layer( + conv_cfg, planes, planes, 3, padding=1, bias=False) + self.add_module(self.norm2_name, norm2) + + + self.relu = TSReLU() + + + self.downsample = downsample + self.stride = stride + self.dilation = dilation + self.with_cp = with_cp + + @property + def norm1(self): + """nn.Module: normalization layer after the first convolution layer""" + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: normalization layer after the second convolution layer""" + return getattr(self, self.norm2_name) + + def forward(self, x): + """Forward function.""" + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.norm2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +class Bottleneck(BaseModule): + expansion = 4 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + plugins=None, + init_cfg=None): + """Bottleneck block for ResNet. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if + it is "caffe", the stride-two layer is the first 1x1 conv layer. + """ + super(Bottleneck, self).__init__(init_cfg) + assert style in ['pytorch', 'caffe'] + assert dcn is None or isinstance(dcn, dict) + assert plugins is None or isinstance(plugins, list) + if plugins is not None: + allowed_position = ['after_conv1', 'after_conv2', 'after_conv3'] + assert all(p['position'] in allowed_position for p in plugins) + + self.inplanes = inplanes + self.planes = planes + self.stride = stride + self.dilation = dilation + self.style = style + self.with_cp = with_cp + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.dcn = dcn + self.with_dcn = dcn is not None + self.plugins = plugins + self.with_plugins = plugins is not None + + if self.with_plugins: + # collect plugins for conv1/conv2/conv3 + self.after_conv1_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv1' + ] + self.after_conv2_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv2' + ] + self.after_conv3_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv3' + ] + + if self.style == 'pytorch': + self.conv1_stride = 1 + self.conv2_stride = stride + else: + self.conv1_stride = stride + self.conv2_stride = 1 + + self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) + self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) + self.norm3_name, norm3 = build_norm_layer( + norm_cfg, planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + conv_cfg, + inplanes, + planes, + kernel_size=1, + stride=self.conv1_stride, + bias=False) + self.add_module(self.norm1_name, norm1) + fallback_on_stride = False + if self.with_dcn: + fallback_on_stride = dcn.pop('fallback_on_stride', False) + if not self.with_dcn or fallback_on_stride: + self.conv2 = build_conv_layer( + conv_cfg, + planes, + planes, + kernel_size=3, + stride=self.conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + else: + assert self.conv_cfg is None, 'conv_cfg must be None for DCN' + self.conv2 = build_conv_layer( + dcn, + planes, + planes, + kernel_size=3, + stride=self.conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + + self.add_module(self.norm2_name, norm2) + self.conv3 = build_conv_layer( + conv_cfg, + planes, + planes * self.expansion, + kernel_size=1, + bias=False) + self.add_module(self.norm3_name, norm3) + + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + + if self.with_plugins: + self.after_conv1_plugin_names = self.make_block_plugins( + planes, self.after_conv1_plugins) + self.after_conv2_plugin_names = self.make_block_plugins( + planes, self.after_conv2_plugins) + self.after_conv3_plugin_names = self.make_block_plugins( + planes * self.expansion, self.after_conv3_plugins) + + def make_block_plugins(self, in_channels, plugins): + """make plugins for block. + + Args: + in_channels (int): Input channels of plugin. + plugins (list[dict]): List of plugins cfg to build. + + Returns: + list[str]: List of the names of plugin. + """ + assert isinstance(plugins, list) + plugin_names = [] + for plugin in plugins: + plugin = plugin.copy() + name, layer = build_plugin_layer( + plugin, + in_channels=in_channels, + postfix=plugin.pop('postfix', '')) + assert not hasattr(self, name), f'duplicate plugin {name}' + self.add_module(name, layer) + plugin_names.append(name) + return plugin_names + + def forward_plugin(self, x, plugin_names): + out = x + for name in plugin_names: + out = getattr(self, name)(out) + return out + + @property + def norm1(self): + """nn.Module: normalization layer after the first convolution layer""" + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: normalization layer after the second convolution layer""" + return getattr(self, self.norm2_name) + + @property + def norm3(self): + """nn.Module: normalization layer after the third convolution layer""" + return getattr(self, self.norm3_name) + + def forward(self, x): + """Forward function.""" + + def _inner_forward(x): + identity = x + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv1_plugin_names) + + out = self.conv2(out) + out = self.norm2(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv2_plugin_names) + + out = self.conv3(out) + out = self.norm3(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv3_plugin_names) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +@MODELS.register_module() +class ResNetTS(BaseModule): + """ResNet backbone. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + stem_channels (int | None): Number of stem channels. If not specified, + it will be the same as `base_channels`. Default: None. + base_channels (int): Number of base channels of res layer. Default: 64. + in_channels (int): Number of input image channels. Default: 3. + num_stages (int): Resnet stages. Default: 4. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv + avg_down (bool): Use AvgPool instead of stride conv when + downsampling in the bottleneck. + frozen_stages (int): Stages to be frozen (stop grad and set eval mode). + -1 means not freezing any parameters. + norm_cfg (dict): Dictionary to construct and config norm layer. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. + plugins (list[dict]): List of plugins for stages, each dict contains: + + - cfg (dict, required): Cfg dict to build plugin. + - position (str, required): Position inside block to insert + plugin, options are 'after_conv1', 'after_conv2', 'after_conv3'. + - stages (tuple[bool], optional): Stages to apply plugin, length + should be same as 'num_stages'. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + zero_init_residual (bool): Whether to use zero init for last norm layer + in resblocks to let them behave as identity. + pretrained (str, optional): model pretrained path. Default: None + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None + + Example: + >>> from mmdet.models import ResNet + >>> import torch + >>> self = ResNet(depth=18) + >>> self.eval() + >>> inputs = torch.rand(1, 3, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 64, 8, 8) + (1, 128, 4, 4) + (1, 256, 2, 2) + (1, 512, 1, 1) + """ + + arch_settings = { + 18: (BasicBlockTS, (2, 2, 2, 2)), + 34: (BasicBlockTS, (3, 4, 6, 3)), + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) + } + + def __init__(self, + depth, + in_channels=3, + stem_channels=None, + base_channels=64, + num_stages=4, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + style='pytorch', + deep_stem=False, + avg_down=False, + frozen_stages=-1, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + dcn=None, + stage_with_dcn=(False, False, False, False), + plugins=None, + with_cp=False, + zero_init_residual=True, + pretrained=None, + init_cfg=None): + super(ResNet, self).__init__(init_cfg) + self.zero_init_residual = zero_init_residual + if depth not in self.arch_settings: + raise KeyError(f'invalid depth {depth} for resnet') + + block_init_cfg = None + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be specified at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer='Conv2d'), + dict( + type='Constant', + val=1, + layer=['_BatchNorm', 'GroupNorm']) + ] + block = self.arch_settings[depth][0] + if self.zero_init_residual: + if block is BasicBlock: + block_init_cfg = dict( + type='Constant', + val=0, + override=dict(name='norm2')) + elif block is Bottleneck: + block_init_cfg = dict( + type='Constant', + val=0, + override=dict(name='norm3')) + else: + raise TypeError('pretrained must be a str or None') + + self.depth = depth + if stem_channels is None: + stem_channels = base_channels + self.stem_channels = stem_channels + self.base_channels = base_channels + self.num_stages = num_stages + assert num_stages >= 1 and num_stages <= 4 + self.strides = strides + self.dilations = dilations + assert len(strides) == len(dilations) == num_stages + self.out_indices = out_indices + assert max(out_indices) < num_stages + self.style = style + self.deep_stem = deep_stem + self.avg_down = avg_down + self.frozen_stages = frozen_stages + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.with_cp = with_cp + self.norm_eval = norm_eval + self.dcn = dcn + self.stage_with_dcn = stage_with_dcn + if dcn is not None: + assert len(stage_with_dcn) == num_stages + self.plugins = plugins + self.block, stage_blocks = self.arch_settings[depth] + self.stage_blocks = stage_blocks[:num_stages] + self.inplanes = stem_channels + + self._make_stem_layer(in_channels, stem_channels) + + self.res_layers = [] + for i, num_blocks in enumerate(self.stage_blocks): + stride = strides[i] + dilation = dilations[i] + dcn = self.dcn if self.stage_with_dcn[i] else None + if plugins is not None: + stage_plugins = self.make_stage_plugins(plugins, i) + else: + stage_plugins = None + planes = base_channels * 2**i + res_layer = self.make_res_layer( + block=self.block, + inplanes=self.inplanes, + planes=planes, + num_blocks=num_blocks, + stride=stride, + dilation=dilation, + style=self.style, + avg_down=self.avg_down, + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + dcn=dcn, + plugins=stage_plugins, + init_cfg=block_init_cfg) + self.inplanes = planes * self.block.expansion + layer_name = f'layer{i + 1}' + self.add_module(layer_name, res_layer) + self.res_layers.append(layer_name) + + self._freeze_stages() + + self.feat_dim = self.block.expansion * base_channels * 2**( + len(self.stage_blocks) - 1) + + def make_stage_plugins(self, plugins, stage_idx): + """Make plugins for ResNet ``stage_idx`` th stage. + + Currently we support to insert ``context_block``, + ``empirical_attention_block``, ``nonlocal_block`` into the backbone + like ResNet/ResNeXt. They could be inserted after conv1/conv2/conv3 of + Bottleneck. + + An example of plugins format could be: + + Examples: + >>> plugins=[ + ... dict(cfg=dict(type='xxx', arg1='xxx'), + ... stages=(False, True, True, True), + ... position='after_conv2'), + ... dict(cfg=dict(type='yyy'), + ... stages=(True, True, True, True), + ... position='after_conv3'), + ... dict(cfg=dict(type='zzz', postfix='1'), + ... stages=(True, True, True, True), + ... position='after_conv3'), + ... dict(cfg=dict(type='zzz', postfix='2'), + ... stages=(True, True, True, True), + ... position='after_conv3') + ... ] + >>> self = ResNet(depth=18) + >>> stage_plugins = self.make_stage_plugins(plugins, 0) + >>> assert len(stage_plugins) == 3 + + Suppose ``stage_idx=0``, the structure of blocks in the stage would be: + + .. code-block:: none + + conv1-> conv2->conv3->yyy->zzz1->zzz2 + + Suppose 'stage_idx=1', the structure of blocks in the stage would be: + + .. code-block:: none + + conv1-> conv2->xxx->conv3->yyy->zzz1->zzz2 + + If stages is missing, the plugin would be applied to all stages. + + Args: + plugins (list[dict]): List of plugins cfg to build. The postfix is + required if multiple same type plugins are inserted. + stage_idx (int): Index of stage to build + + Returns: + list[dict]: Plugins for current stage + """ + stage_plugins = [] + for plugin in plugins: + plugin = plugin.copy() + stages = plugin.pop('stages', None) + assert stages is None or len(stages) == self.num_stages + # whether to insert plugin into current stage + if stages is None or stages[stage_idx]: + stage_plugins.append(plugin) + + return stage_plugins + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``.""" + return ResLayer(**kwargs) + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + def _make_stem_layer(self, in_channels, stem_channels): + if self.deep_stem: + self.stem = nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels, + stem_channels // 2, + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels // 2)[1], + nn.ReLU(inplace=True), + build_conv_layer( + self.conv_cfg, + stem_channels // 2, + stem_channels // 2, + kernel_size=3, + stride=1, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels // 2)[1], + nn.ReLU(inplace=True), + build_conv_layer( + self.conv_cfg, + stem_channels // 2, + stem_channels, + kernel_size=3, + stride=1, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels)[1], + nn.ReLU(inplace=True)) + else: + self.conv1 = build_conv_layer( + self.conv_cfg, + in_channels, + stem_channels, + kernel_size=7, + stride=2, + padding=3, + bias=False) + self.norm1_name, norm1 = build_norm_layer( + self.norm_cfg, stem_channels, postfix=1) + self.add_module(self.norm1_name, norm1) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + def _freeze_stages(self): + if self.frozen_stages >= 0: + if self.deep_stem: + self.stem.eval() + for param in self.stem.parameters(): + param.requires_grad = False + else: + self.norm1.eval() + for m in [self.conv1, self.norm1]: + for param in m.parameters(): + param.requires_grad = False + + for i in range(1, self.frozen_stages + 1): + m = getattr(self, f'layer{i}') + m.eval() + for param in m.parameters(): + param.requires_grad = False + + def forward(self, x): + """Forward function.""" + if self.deep_stem: + x = self.stem(x) + else: + x = self.conv1(x) + x = self.norm1(x) + x = self.relu(x) + x = self.maxpool(x) + outs = [] + for i, layer_name in enumerate(self.res_layers): + res_layer = getattr(self, layer_name) + x = res_layer(x) + if i in self.out_indices: + outs.append(x) + return tuple(outs) + + def train(self, mode=True): + """Convert the model into training mode while keep normalization layer + freezed.""" + super(ResNet, self).train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() + + +@MODELS.register_module() +class ResNetV1dTS(ResNetTS): + r"""ResNetV1d variant described in `Bag of Tricks + `_. + + Compared with default ResNet(ResNetV1b), ResNetV1d replaces the 7x7 conv in + the input stem with three 3x3 convs. And in the downsampling block, a 2x2 + avg_pool with stride 2 is added before conv, whose stride is changed to 1. + """ + + def __init__(self, **kwargs): + super(ResNetV1dTS, self).__init__( + deep_stem=True, avg_down=True, **kwargs) diff --git a/examples/mmdetection3d/ts_plugin/models/layers/__init__.py b/examples/mmdetection3d/ts_plugin/models/layers/__init__.py new file mode 100644 index 0000000..b5cd973 --- /dev/null +++ b/examples/mmdetection3d/ts_plugin/models/layers/__init__.py @@ -0,0 +1 @@ +from .sparse_block import SparseBasicBlockTS, replace_feature_ts, make_sparse_convmodule_ts diff --git a/examples/mmdetection3d/ts_plugin/models/layers/sparse_block.py b/examples/mmdetection3d/ts_plugin/models/layers/sparse_block.py new file mode 100644 index 0000000..1717a0f --- /dev/null +++ b/examples/mmdetection3d/ts_plugin/models/layers/sparse_block.py @@ -0,0 +1,137 @@ +from torch import nn +import torchsparse.nn as spnn + +from ..backbones.resnet import BasicBlockTS +from mmcv.cnn import build_conv_layer, build_norm_layer + +import logging + +def replace_feature_ts(out, new_features): + out.feats = new_features + return out + + +class SparseBasicBlockTS(BasicBlockTS): + """Sparse basic block for PartA^2. + + Sparse basic block implemented with submanifold sparse convolution. + + Args: + inplanes (int): Inplanes of block. + planes (int): Planes of block. + stride (int or Tuple[int]): Stride of the first block. Defaults to 1. + downsample (Module, optional): Down sample module for block. + Defaults to None. + indice_key (str): Indice key for spconv. Default to None. + conv_cfg (:obj:`ConfigDict` or dict, optional): Config dict for + convolution layer. Defaults to None. + norm_cfg (:obj:`ConfigDict` or dict, optional): Config dict for + normalization layer. Defaults to None. + """ + + expansion = 1 + + def __init__( + self, + inplanes, + planes, + stride=1, + downsample=None, + conv_cfg=None, + norm_cfg=None, + act_cfg=None, + ): + BasicBlockTS.__init__( + self, + inplanes, + planes, + stride=stride, + downsample=downsample, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + ) + if act_cfg is not None: + if act_cfg == "swish": + self.relu = spnn.SiLU(inplace=True) + else: + self.relu = spnn.ReLU(inplace=True) + + + +def make_sparse_convmodule_ts( + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + conv_type="TorchSparseConv3d", + norm_cfg=None, + order=("conv", "norm", "act"), + activation_type="relu", + indice_key=None, + transposed=False +): + """Make sparse convolution module. + + Args: + in_channels (int): The number of input channels. + out_channels (int): The number of out channels. + kernel_size (int | Tuple[int]): Kernel size of convolution. + indice_key (str): The indice key used for sparse tensor. + stride (int or tuple[int]): The stride of convolution. + padding (int or tuple[int]): The padding number of input. + conv_type (str): Sparse conv type in spconv. Defaults to 'SubMConv3d'. + norm_cfg (:obj:`ConfigDict` or dict, optional): Config dict for + normalization layer. Defaults to None. + order (Tuple[str]): The order of conv/norm/activation layers. It is a + sequence of "conv", "norm" and "act". Common examples are + ("conv", "norm", "act") and ("act", "conv", "norm"). + Defaults to ('conv', 'norm', 'act'). + + Returns: + spconv.SparseSequential: sparse convolution module. + """ + assert isinstance(order, tuple) and len(order) <= 3 + assert set(order) | {"conv", "norm", "act"} == {"conv", "norm", "act"} + + conv_cfg = {"type": conv_type} + + if norm_cfg is None: + norm_cfg = dict(type='BN1d') + + layers = [] + for layer in order: + if layer == "conv": + layers.append( + build_conv_layer( + cfg=conv_cfg, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + bias=False, + transposed=transposed, + ) + # spnn.Conv3d( + # in_channels=in_channels, + # out_channels=out_channels, + # kernel_size=kernel_size, + # stride=stride, + # padding=padding, + # bias=False, + # transposed=transposed) + ) + elif layer == "norm": + assert norm_cfg is not None, "norm_cfg must be provided" + layers.append(build_norm_layer(norm_cfg, out_channels)[1]) + elif layer == "act": + if activation_type == "relu": + layers.append(spnn.ReLU(inplace=True)) + elif activation_type == "swish": + layers.append(spnn.SiLU(inplace=True)) + else: + raise NotImplementedError + layers = nn.Sequential(*layers) + logging.info("Made TorchSparse Module") + return layers \ No newline at end of file diff --git a/examples/mmdetection3d/ts_plugin/models/middle_encoders/__init__.py b/examples/mmdetection3d/ts_plugin/models/middle_encoders/__init__.py new file mode 100644 index 0000000..5ebcec9 --- /dev/null +++ b/examples/mmdetection3d/ts_plugin/models/middle_encoders/__init__.py @@ -0,0 +1,3 @@ +from .sparse_encoder import SparseEncoderTS +from .voxel_set_abstraction import VoxelSetAbstractionTS +from .sparse_unet import SparseUNetTS \ No newline at end of file diff --git a/examples/mmdetection3d/ts_plugin/models/middle_encoders/sparse_encoder.py b/examples/mmdetection3d/ts_plugin/models/middle_encoders/sparse_encoder.py new file mode 100644 index 0000000..6814d94 --- /dev/null +++ b/examples/mmdetection3d/ts_plugin/models/middle_encoders/sparse_encoder.py @@ -0,0 +1,248 @@ +from typing import Dict, List, Optional, Tuple, Union, Sequence, Mapping, Any +from functools import partial + +import torch.nn as nn + +from mmdet3d.registry import MODELS +from mmengine.runner import amp + +import torchsparse +from torchsparse.nn import functional as F +F.set_conv_mode(1) + +import os, logging + +from ..layers import SparseBasicBlockTS, make_sparse_convmodule_ts + +@MODELS.register_module("SparseEncoderTS") +class SparseEncoderTS(nn.Module): + r"""Sparse encoder for SECOND and Part-A2. + + Args: + in_channels (int): The number of input channels. + sparse_shape (list[int]): The sparse shape of input tensor. + order (list[str], optional): Order of conv module. + Defaults to ('conv', 'norm', 'act'). + norm_cfg (dict, optional): Config of normalization layer. Defaults to + dict(type='BN1d', eps=1e-3, momentum=0.01). + base_channels (int, optional): Out channels for conv_input layer. + Defaults to 16. + output_channels (int, optional): Out channels for conv_out layer. + Defaults to 128. + encoder_channels (tuple[tuple[int]], optional): + Convolutional channels of each encode block. + encoder_paddings (tuple[tuple[int]], optional): + Paddings of each encode block. + Defaults to ((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)). + block_type (str, optional): Type of the block to use. + Defaults to 'conv_module'. + """ + + DEFAULT_CONV_CFG = {"type": "TorchSparseConv3d"} + DEFAULT_NORM_CFG = {"type": "TorchSparseBatchNorm", "eps": 1e-3, "momentum": 0.01} + + def __init__( + self, + in_channels: int, + sparse_shape, + order: Sequence[str] = ("conv", "norm", "act"), + norm_cfg: Mapping[str, Any] = DEFAULT_NORM_CFG, + base_channels: int = 16, + output_channels: int = 128, + encoder_channels=((16,), (32, 32, 32), (64, 64, 64), (64, 64, 64)), + encoder_paddings=((1,), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1)), + block_type: str = "conv_module", + activation_type: str = "relu", + return_middle_feats: bool = False + ) -> None: + super().__init__() + assert block_type in ["conv_module", "basicblock"] + self.sparse_shape = sparse_shape + self.in_channels = in_channels + self.order = order + self.base_channels = base_channels + self.output_channels = output_channels + self.encoder_channels = encoder_channels + self.encoder_paddings = encoder_paddings + self.stage_num = len(self.encoder_channels) + self.fp16_enabled = False + self.activation_type = activation_type + self.return_middle_feats = return_middle_feats + # Spconv init all weight on its own + + assert isinstance(order, (list, tuple)) and len(order) == 3 + assert set(order) == {"conv", "norm", "act"} + + make_block_fn = partial(make_sparse_convmodule_ts, activation_type=activation_type) + + if self.order[0] != "conv": # pre activate + self.conv_input = make_block_fn( + in_channels, + self.base_channels, + 3, + norm_cfg=norm_cfg, + padding=1, + # indice_key="subm1", + conv_type="TorchSparseConv3d", + order=("conv",), + ) + else: # post activate + self.conv_input = make_block_fn( + in_channels, + self.base_channels, + 3, + norm_cfg=norm_cfg, + padding=1, + # indice_key="subm1", + conv_type="TorchSparseConv3d", + ) + + encoder_out_channels = self.make_encoder_layers( + make_block_fn, norm_cfg, self.base_channels, block_type=block_type # make_block_fn is a function to passed in to make blocks + ) + + self.conv_out = make_block_fn( + encoder_out_channels, + self.output_channels, + kernel_size=(3, 1, 1), + stride=(2, 1, 1), + norm_cfg=norm_cfg, + padding=0, + # indice_key="spconv_down2", + conv_type="TorchSparseConv3d", + ) + # print("\033[92m" + "Sparse Encoder" + "\033[0m") + logging.info("Using TorchSparse SparseEncoder") + + @amp.autocast(enabled=False) + def forward(self, voxel_features, coors, batch_size, **kwargs): + """Forward of SparseEncoder. + + Args: + voxel_features (torch.float32): Voxel features in shape (N, C). + coors (torch.int32): Coordinates in shape (N, 4), + the columns in the order of (batch_idx, z_idx, y_idx, x_idx). + batch_size (int): Batch size. + + Returns: + dict: Backbone features. + """ + coors = coors.int() + spatial_range = (coors[:, 0].max().item() + 1,) + tuple(self.sparse_shape) + input_sp_tensor = torchsparse.SparseTensor(voxel_features, coors, spatial_range=spatial_range) + x = self.conv_input(input_sp_tensor) + + encode_features = [] + for encoder_layer in self.encoder_layers: + x = encoder_layer(x) + encode_features.append(x) + + # for detection head + # [200, 176, 5] -> [200, 176, 2] + out = self.conv_out(encode_features[-1]) # out differs + spatial_features = out.dense() + + # TS Integrated New sequence + N, D, H, W, C = spatial_features.shape + spatial_features = spatial_features.permute(0, 2, 3, 4, 1).contiguous().reshape(N, H, W, C*D).permute(0, 3, 1, 2).contiguous() + + # SpConv Original Sequence + # N, C, D, H, W = spatial_features.shape + # spatial_features = spatial_features.view(N, C * D, H, W) + + # N, D, H, W, C = spatial_features.shape + # spatial_features = spatial_features.reshape(N, H, W, D * C).permute(0, 3, 1, 2).contiguous() + # # consistent with spconv + # # spatial_features = spatial_features.transpose(-1, -2).reshape(N, H, W, D * C).permute(0, 3, 1, 2).contiguous() + + if self.return_middle_feats: + return spatial_features, encode_features + else: + return spatial_features + + def make_encoder_layers( + self, + make_block, + norm_cfg, + in_channels, + block_type="conv_module", + conv_cfg=DEFAULT_CONV_CFG, + ): + """Make encoder layers using sparse convs. + + Args: + make_block (method): A bounded function to build blocks. + norm_cfg (dict[str]): Config of normalization layer. + in_channels (int): The number of encoder input channels. + block_type (str, optional): Type of the block to use. + Defaults to 'conv_module'. + conv_cfg (dict, optional): Config of conv layer. Defaults to + dict(type='SubMConv3d'). + + Returns: + int: The number of encoder output channels. + """ + assert block_type in ["conv_module", "basicblock"] + self.encoder_layers = nn.Sequential() + + for i, blocks in enumerate(self.encoder_channels): + blocks_list = [] + for j, out_channels in enumerate(tuple(blocks)): + padding = tuple(self.encoder_paddings[i])[j] + # each stage started with a spconv layer + # except the first stage + if i != 0 and j == 0 and block_type == "conv_module": + blocks_list.append( + make_block( + in_channels, + out_channels, + 3, + norm_cfg=norm_cfg, + stride=2, + padding=padding, + # indice_key=f"spconv{i + 1}", + conv_type="TorchSparseConv3d", + ) + ) + elif block_type == "basicblock": + if j == len(blocks) - 1 and i != len(self.encoder_channels) - 1: + blocks_list.append( + make_block( + in_channels, + out_channels, + 3, + norm_cfg=norm_cfg, + stride=2, + padding=padding, + # indice_key=f"spconv{i + 1}", + conv_type="TorchSparseConv3d", + ) + ) + else: + blocks_list.append( + SparseBasicBlockTS( + out_channels, + out_channels, + norm_cfg=norm_cfg, + conv_cfg=conv_cfg, + act_cfg=self.activation_type, + ) + ) + else: + blocks_list.append( + make_block( + in_channels, + out_channels, + 3, + norm_cfg=norm_cfg, + padding=padding, + # indice_key=f"subm{i + 1}", + conv_type="TorchSparseConv3d", + ) + ) + in_channels = out_channels + stage_name = f"encoder_layer{i + 1}" + # stage_layers = spconv.SparseSequential(*blocks_list) + stage_layers = nn.Sequential(*blocks_list) + self.encoder_layers.add_module(stage_name, stage_layers) + return out_channels diff --git a/examples/mmdetection3d/ts_plugin/models/middle_encoders/sparse_unet.py b/examples/mmdetection3d/ts_plugin/models/middle_encoders/sparse_unet.py new file mode 100644 index 0000000..16817a7 --- /dev/null +++ b/examples/mmdetection3d/ts_plugin/models/middle_encoders/sparse_unet.py @@ -0,0 +1,335 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Dict, List, Optional, Tuple + +import torch +from torch import Tensor, nn + +from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE +from mmdet3d.models.layers.torchsparse import IS_TORCHSPARSE_AVAILABLE + +if IS_SPCONV2_AVAILABLE: + from spconv.pytorch import SparseConvTensor, SparseSequential +else: + from mmcv.ops import SparseConvTensor, SparseSequential + +if IS_TORCHSPARSE_AVAILABLE: + import torchsparse +else: + raise Exception("No TorchSparse Available") + +from mmengine.model import BaseModule + +from ..layers.sparse_block import SparseBasicBlockTS, make_sparse_convmodule_ts +from ..layers.sparse_block import replace_feature_ts +from mmdet3d.registry import MODELS + +import logging, os + +TwoTupleIntType = Tuple[Tuple[int]] + +savepath = os.environ.get("PT_SAVE_PATH") + +@MODELS.register_module("SparseUNetTS") +class SparseUNetTS(BaseModule): + r"""SparseUNet for PartA^2. + + See the `paper `_ for more details. + + Args: + in_channels (int): The number of input channels. + sparse_shape (list[int]): The sparse shape of input tensor. + norm_cfg (dict): Config of normalization layer. + base_channels (int): Out channels for conv_input layer. + output_channels (int): Out channels for conv_out layer. + encoder_channels (tuple[tuple[int]]): + Convolutional channels of each encode block. + encoder_paddings (tuple[tuple[int]]): Paddings of each encode block. + decoder_channels (tuple[tuple[int]]): + Convolutional channels of each decode block. + decoder_paddings (tuple[tuple[int]]): Paddings of each decode block. + """ + + DEFAULT_CONV_CFG = {"type": "TorchSparseConv3d"} + DEFAULT_NORM_CFG = {"type": "TorchSparseBatchNorm", "eps": 1e-3, "momentum": 0.01} + + + def __init__( + self, + in_channels: int, + sparse_shape: List[int], + order: Tuple[str] = ('conv', 'norm', 'act'), + norm_cfg: dict = DEFAULT_NORM_CFG, # dict(type='BN1d', eps=1e-3, momentum=0.01), + base_channels: int = 16, + output_channels: int = 128, + encoder_channels: Optional[TwoTupleIntType] = ((16, ), (32, 32, + 32), + (64, 64, + 64), (64, 64, 64)), + encoder_paddings: Optional[TwoTupleIntType] = ((1, ), (1, 1, 1), + (1, 1, 1), + ((0, 1, 1), 1, 1)), + decoder_channels: Optional[TwoTupleIntType] = ((64, 64, + 64), (64, 64, 32), + (32, 32, + 16), (16, 16, 16)), + decoder_paddings: Optional[TwoTupleIntType] = ((1, 0), (1, 0), + (0, 0), (0, 1)), + init_cfg: bool = None): + super().__init__(init_cfg=init_cfg) + self.sparse_shape = sparse_shape + self.in_channels = in_channels + self.order = order + self.base_channels = base_channels + self.output_channels = output_channels + self.encoder_channels = encoder_channels + self.encoder_paddings = encoder_paddings + self.decoder_channels = decoder_channels + self.decoder_paddings = decoder_paddings + self.stage_num = len(self.encoder_channels) + # Spconv init all weight on its own + + assert isinstance(order, tuple) and len(order) == 3 + assert set(order) == {'conv', 'norm', 'act'} + + if self.order[0] != 'conv': # pre activate + self.conv_input = make_sparse_convmodule_ts( + in_channels, + self.base_channels, + 3, + norm_cfg=norm_cfg, + padding=1, + conv_type='TorchSparseConv3d', + order=('conv', )) + else: # post activate + self.conv_input = make_sparse_convmodule_ts( + in_channels, + self.base_channels, + 3, + norm_cfg=norm_cfg, + padding=1, + conv_type='TorchSparseConv3d',) + # import torchsparse.nn as spnn + # self.conv_input = nn.Sequential( + # spnn.Conv3d(in_channels, self.base_channels, 3, padding=1, bias=False), + # ) + + encoder_out_channels = self.make_encoder_layers( + make_sparse_convmodule_ts, norm_cfg, self.base_channels) + self.make_decoder_layers(make_sparse_convmodule_ts, norm_cfg, + encoder_out_channels) # extra + + self.conv_out = make_sparse_convmodule_ts( + encoder_out_channels, + self.output_channels, + kernel_size=(3, 1, 1), + stride=(2, 1, 1), + norm_cfg=norm_cfg, + padding=0, + indice_key='spconv_down2', + conv_type='TorchSparseConv3d') + + + def forward(self, voxel_features: Tensor, coors: Tensor, + batch_size: int) -> Dict[str, Tensor]: + """Forward of SparseUNet. + + Args: + voxel_features (torch.float32): Voxel features in shape [N, C]. + coors (torch.int32): Coordinates in shape [N, 4], + the columns in the order of (batch_idx, z_idx, y_idx, x_idx). + batch_size (int): Batch size. + + Returns: + dict[str, torch.Tensor]: Backbone features. + """ + coors = coors.int() + input_sp_tensor = torchsparse.SparseTensor(voxel_features, coors, spatial_range=(coors[:, 0].max().item() + 1,) + tuple(self.sparse_shape)) + x = self.conv_input(input_sp_tensor) + + encode_features = [] + for i, encoder_layer in enumerate(self.encoder_layers): + x = encoder_layer(x) + encode_features.append(x) + + # for detection head + # [200, 176, 5] -> [200, 176, 2] + out = self.conv_out(encode_features[-1]) + + spatial_features = out.dense() + + + N, D, H, W, C = spatial_features.shape + spatial_features = spatial_features.permute(0, 2, 3, 4, 1).contiguous().reshape(N, H, W, C*D).permute(0, 3, 1, 2).contiguous() + + # for segmentation head, with output shape: + # [400, 352, 11] <- [200, 176, 5] + # [800, 704, 21] <- [400, 352, 11] + # [1600, 1408, 41] <- [800, 704, 21] + # [1600, 1408, 41] <- [1600, 1408, 41] + decode_features = [] + x = encode_features[-1] + for i in range(self.stage_num, 0, -1): + x = self.decoder_layer_forward(encode_features[i - 1], x, + getattr(self, f'lateral_layer{i}'), + getattr(self, f'merge_layer{i}'), + getattr(self, f'upsample_layer{i}')) + decode_features.append(x) + + seg_features = decode_features[-1].feats + + ret = dict( + spatial_features=spatial_features, seg_features=seg_features) + return ret + + def decoder_layer_forward( + self, x_lateral: SparseConvTensor, x_bottom: SparseConvTensor, + lateral_layer: SparseBasicBlockTS, merge_layer: SparseSequential, + upsample_layer: SparseSequential) -> SparseConvTensor: + """Forward of upsample and residual block. + + Args: + x_lateral (:obj:`SparseConvTensor`): Lateral tensor. + x_bottom (:obj:`SparseConvTensor`): Feature from bottom layer. + lateral_layer (SparseBasicBlockTS): Convolution for lateral tensor. + merge_layer (SparseSequential): Convolution for merging features. + upsample_layer (SparseSequential): Convolution for upsampling. + + Returns: + :obj:`SparseConvTensor`: Upsampled feature. + """ + x = lateral_layer(x_lateral) + x = replace_feature_ts(x, torch.cat((x_bottom.feats, x.feats), + dim=1)) + x_merge = merge_layer(x) + x = self.reduce_channel(x, x_merge.feats.shape[1]) + x = replace_feature_ts(x, x_merge.feats + x.feats) + x = upsample_layer(x) + return x + + @staticmethod + def reduce_channel(x: SparseConvTensor, + out_channels: int) -> SparseConvTensor: + """reduce channel for element-wise addition. + + Args: + x (:obj:`SparseConvTensor`): Sparse tensor, ``x.features`` + are in shape (N, C1). + out_channels (int): The number of channel after reduction. + + Returns: + :obj:`SparseConvTensor`: Channel reduced feature. + """ + features = x.feats + n, in_channels = features.shape + assert (in_channels % out_channels + == 0) and (in_channels >= out_channels) + x = replace_feature_ts(x, features.view(n, out_channels, -1).sum(dim=2)) + return x + + def make_encoder_layers(self, make_block: nn.Module, norm_cfg: dict, + in_channels: int) -> int: + """make encoder layers using sparse convs. + + Args: + make_block (method): A bounded function to build blocks. + norm_cfg (dict[str]): Config of normalization layer. + in_channels (int): The number of encoder input channels. + + Returns: + int: The number of encoder output channels. + """ + self.encoder_layers = SparseSequential() + + for i, blocks in enumerate(self.encoder_channels): + blocks_list = [] + for j, out_channels in enumerate(tuple(blocks)): + padding = tuple(self.encoder_paddings[i])[j] + # each stage started with a spconv layer + # except the first stage + if i != 0 and j == 0: + blocks_list.append( + make_block( + in_channels, + out_channels, + 3, + norm_cfg=norm_cfg, + stride=2, + padding=padding, + indice_key=f'spconv{i + 1}', + conv_type='TorchSparseConv3d')) + else: + blocks_list.append( + make_block( + in_channels, + out_channels, + 3, + norm_cfg=norm_cfg, + padding=padding, + indice_key=f'subm{i + 1}', + conv_type='TorchSparseConv3d')) + in_channels = out_channels + stage_name = f'encoder_layer{i + 1}' + stage_layers = SparseSequential(*blocks_list) + self.encoder_layers.add_module(stage_name, stage_layers) + return out_channels + + def make_decoder_layers(self, make_block: nn.Module, norm_cfg: dict, + in_channels: int) -> int: + """make decoder layers using sparse convs. + + Args: + make_block (method): A bounded function to build blocks. + norm_cfg (dict[str]): Config of normalization layer. + in_channels (int): The number of encoder input channels. + + Returns: + int: The number of encoder output channels. + """ + block_num = len(self.decoder_channels) + for i, block_channels in enumerate(self.decoder_channels): + paddings = self.decoder_paddings[i] + setattr( + self, f'lateral_layer{block_num - i}', + SparseBasicBlockTS( + in_channels, + block_channels[0], + conv_cfg=dict( + type='TorchSparseConv3d'), # type='TorchSparseConv3d', indice_key=f'subm{block_num - i}'), + norm_cfg=norm_cfg)) + setattr( + self, f'merge_layer{block_num - i}', + make_block( + in_channels * 2, + block_channels[1], + 3, + norm_cfg=norm_cfg, + padding=paddings[0], + indice_key=f'subm{block_num - i}', + conv_type='TorchSparseConv3d')) + if block_num - i != 1: + setattr( + self, f'upsample_layer{block_num - i}', + make_block( + in_channels, + block_channels[2], + 3, + stride=2, + norm_cfg=norm_cfg, + indice_key=f'spconv{block_num - i}', + conv_type='TorchSparseConv3d', + transposed=True)) + else: + # use submanifold conv instead of inverse conv + # in the last block + setattr( + self, f'upsample_layer{block_num - i}', + make_block( + in_channels, + block_channels[2], + 3, + norm_cfg=norm_cfg, + padding=paddings[1], + indice_key='subm1', + conv_type='TorchSparseConv3d')) + in_channels = block_channels[2] + # print(self) diff --git a/examples/mmdetection3d/ts_plugin/models/middle_encoders/voxel_set_abstraction.py b/examples/mmdetection3d/ts_plugin/models/middle_encoders/voxel_set_abstraction.py new file mode 100644 index 0000000..3286084 --- /dev/null +++ b/examples/mmdetection3d/ts_plugin/models/middle_encoders/voxel_set_abstraction.py @@ -0,0 +1,334 @@ +from typing import List, Optional + +import mmengine +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule +from mmcv.ops.furthest_point_sample import furthest_point_sample +from mmengine.model import BaseModule +from torch import Tensor + +from mmdet3d.registry import MODELS +from mmdet3d.utils import InstanceList + +import os + + +def bilinear_interpolate_torch(inputs: Tensor, x: Tensor, y: Tensor) -> Tensor: + """Bilinear interpolate for inputs.""" + x0 = torch.floor(x).long() + x1 = x0 + 1 + + y0 = torch.floor(y).long() + y1 = y0 + 1 + + x0 = torch.clamp(x0, 0, inputs.shape[1] - 1) + x1 = torch.clamp(x1, 0, inputs.shape[1] - 1) + y0 = torch.clamp(y0, 0, inputs.shape[0] - 1) + y1 = torch.clamp(y1, 0, inputs.shape[0] - 1) + + Ia = inputs[y0, x0] + Ib = inputs[y1, x0] + Ic = inputs[y0, x1] + Id = inputs[y1, x1] + + wa = (x1.type_as(x) - x) * (y1.type_as(y) - y) + wb = (x1.type_as(x) - x) * (y - y0.type_as(y)) + wc = (x - x0.type_as(x)) * (y1.type_as(y) - y) + wd = (x - x0.type_as(x)) * (y - y0.type_as(y)) + ans = torch.t((torch.t(Ia) * wa)) + torch.t(torch.t(Ib) * wb) + torch.t( + torch.t(Ic) * wc) + torch.t(torch.t(Id) * wd) + return ans + +@MODELS.register_module("VoxelSetAbstractionTS") +class VoxelSetAbstractionTS(BaseModule): + """Voxel set abstraction module for PVRCNN and PVRCNN++. + + Args: + num_keypoints (int): The number of key points sampled from + raw points cloud. + fused_out_channel (int): Key points feature output channels + num after fused. Default to 128. + voxel_size (list[float]): Size of voxels. Defaults to + [0.05, 0.05, 0.1]. + point_cloud_range (list[float]): Point cloud range. Defaults to + [0, -40, -3, 70.4, 40, 1]. + voxel_sa_cfgs_list (List[dict or ConfigDict], optional): List of SA + module cfg. Used to gather key points features from multi-wise + voxel features. Default to None. + rawpoints_sa_cfgs (dict or ConfigDict, optional): SA module cfg. + Used to gather key points features from raw points. Default to + None. + bev_feat_channel (int): Bev features channels num. + Default to 256. + bev_scale_factor (int): Bev features scale factor. Default to 8. + voxel_center_as_source (bool): Whether used voxel centers as points + cloud key points. Defaults to False. + norm_cfg (dict[str]): Config of normalization layer. Default + used dict(type='BN1d', eps=1e-5, momentum=0.1). + bias (bool | str, optional): If specified as `auto`, it will be + decided by `norm_cfg`. `bias` will be set as True if + `norm_cfg` is None, otherwise False. Default: 'auto'. + """ + + def __init__(self, + num_keypoints: int, + fused_out_channel: int = 128, + voxel_size: list = [0.05, 0.05, 0.1], + point_cloud_range: list = [0, -40, -3, 70.4, 40, 1], + voxel_sa_cfgs_list: Optional[list] = None, + rawpoints_sa_cfgs: Optional[dict] = None, + bev_feat_channel: int = 256, + bev_scale_factor: int = 8, + voxel_center_as_source: bool = False, + norm_cfg: dict = dict(type='BN2d', eps=1e-5, momentum=0.1), + bias: str = 'auto') -> None: + super().__init__() + self.num_keypoints = num_keypoints + self.fused_out_channel = fused_out_channel + self.voxel_size = voxel_size + self.point_cloud_range = point_cloud_range + self.voxel_center_as_source = voxel_center_as_source + + gathered_channel = 0 + + if rawpoints_sa_cfgs is not None: + self.rawpoints_sa_layer = MODELS.build(rawpoints_sa_cfgs) + gathered_channel += sum( + [x[-1] for x in rawpoints_sa_cfgs.mlp_channels]) + else: + self.rawpoints_sa_layer = None + + if voxel_sa_cfgs_list is not None: + self.voxel_sa_configs_list = voxel_sa_cfgs_list + self.voxel_sa_layers = nn.ModuleList() + for voxel_sa_config in voxel_sa_cfgs_list: + cur_layer = MODELS.build(voxel_sa_config) + self.voxel_sa_layers.append(cur_layer) + gathered_channel += sum( + [x[-1] for x in voxel_sa_config.mlp_channels]) + else: + self.voxel_sa_layers = None + + if bev_feat_channel is not None and bev_scale_factor is not None: + self.bev_cfg = mmengine.Config( + dict( + bev_feat_channels=bev_feat_channel, + bev_scale_factor=bev_scale_factor)) + gathered_channel += bev_feat_channel + else: + self.bev_cfg = None + self.point_feature_fusion_layer = nn.Sequential( + ConvModule( + gathered_channel, + fused_out_channel, + kernel_size=(1, 1), + stride=(1, 1), + conv_cfg=dict(type='Conv2d'), + norm_cfg=norm_cfg, + bias=bias)) + + def interpolate_from_bev_features(self, keypoints: torch.Tensor, + bev_features: torch.Tensor, + batch_size: int, + bev_scale_factor: int) -> torch.Tensor: + """Gather key points features from bev feature map by interpolate. + + Args: + keypoints (torch.Tensor): Sampled key points with shape + (N1 + N2 + ..., NDim). + bev_features (torch.Tensor): Bev feature map from the first + stage with shape (B, C, H, W). + batch_size (int): Input batch size. + bev_scale_factor (int): Bev feature map scale factor. + + Returns: + torch.Tensor: Key points features gather from bev feature + map with shape (N1 + N2 + ..., C) + """ + x_idxs = (keypoints[..., 0] - + self.point_cloud_range[0]) / self.voxel_size[0] + y_idxs = (keypoints[..., 1] - + self.point_cloud_range[1]) / self.voxel_size[1] + + x_idxs = x_idxs / bev_scale_factor + y_idxs = y_idxs / bev_scale_factor + + point_bev_features_list = [] + for k in range(batch_size): + cur_x_idxs = x_idxs[k, ...] + cur_y_idxs = y_idxs[k, ...] + cur_bev_features = bev_features[k].permute(1, 2, 0) # (H, W, C) + point_bev_features = bilinear_interpolate_torch( + cur_bev_features, cur_x_idxs, cur_y_idxs) + point_bev_features_list.append(point_bev_features) + + point_bev_features = torch.cat( + point_bev_features_list, dim=0) # (N1 + N2 + ..., C) + return point_bev_features.view(batch_size, keypoints.shape[1], -1) + + def get_voxel_centers(self, coors: torch.Tensor, + scale_factor: float) -> torch.Tensor: + """Get voxel centers coordinate. + + Args: + coors (torch.Tensor): Coordinates of voxels shape is Nx(1+NDim), + where 1 represents the batch index. + scale_factor (float): Scale factor. + + Returns: + torch.Tensor: Voxel centers coordinate with shape (N, 3). + """ + assert coors.shape[1] == 4 + voxel_centers = coors[:, [3, 2, 1]].float() # (xyz) + voxel_size = torch.tensor( + self.voxel_size, + device=voxel_centers.device).float() * scale_factor + pc_range = torch.tensor( + self.point_cloud_range[0:3], device=voxel_centers.device).float() + voxel_centers = (voxel_centers + 0.5) * voxel_size + pc_range + return voxel_centers + + def sample_key_points(self, points: List[torch.Tensor], + coors: torch.Tensor) -> torch.Tensor: + """Sample key points from raw points cloud. + + Args: + points (List[torch.Tensor]): Point cloud of each sample. + coors (torch.Tensor): Coordinates of voxels shape is Nx(1+NDim), + where 1 represents the batch index. + + Returns: + torch.Tensor: (B, M, 3) Key points of each sample. + M is num_keypoints. + """ + assert points is not None or coors is not None + if self.voxel_center_as_source: + _src_points = self.get_voxel_centers(coors=coors, scale_factor=1) + batch_size = coors[-1, 0].item() + 1 + src_points = [ + _src_points[coors[:, 0] == b] for b in range(batch_size) + ] + else: + src_points = [p[..., :3] for p in points] + + keypoints_list = [] + for points_to_sample in src_points: + num_points = points_to_sample.shape[0] + cur_pt_idxs = furthest_point_sample( + points_to_sample.unsqueeze(dim=0).contiguous(), + self.num_keypoints).long()[0] + + if num_points < self.num_keypoints: + times = int(self.num_keypoints / num_points) + 1 + non_empty = cur_pt_idxs[:num_points] + cur_pt_idxs = non_empty.repeat(times)[:self.num_keypoints] + + keypoints = points_to_sample[cur_pt_idxs] + + keypoints_list.append(keypoints) + keypoints = torch.stack(keypoints_list, dim=0) # (B, M, 3) + return keypoints + + def forward(self, batch_inputs_dict: dict, feats_dict: dict, + rpn_results_list: InstanceList) -> dict: + """Extract point-wise features from multi-input. + + Args: + batch_inputs_dict (dict): The model input dict which include + 'points', 'voxels' keys. + + - points (list[torch.Tensor]): Point cloud of each sample. + - voxels (dict[torch.Tensor]): Voxels of the batch sample. + feats_dict (dict): Contains features from the first + stage. + rpn_results_list (List[:obj:`InstanceData`]): Detection results + of rpn head. + + Returns: + dict: Contain Point-wise features, include: + - keypoints (torch.Tensor): Sampled key points. + - keypoint_features (torch.Tensor): Gathered key points + features from multi input. + - fusion_keypoint_features (torch.Tensor): Fusion + keypoint_features by point_feature_fusion_layer. + """ + points = batch_inputs_dict['points'] + voxel_encode_features = feats_dict['multi_scale_3d_feats'] + bev_encode_features = feats_dict['spatial_feats'] + if self.voxel_center_as_source: + voxels_coors = batch_inputs_dict['voxels']['coors'] + else: + voxels_coors = None + keypoints = self.sample_key_points(points, voxels_coors) + + point_features_list = [] + batch_size = len(points) + + if self.bev_cfg is not None: + point_bev_features = self.interpolate_from_bev_features( + keypoints, bev_encode_features, batch_size, + self.bev_cfg.bev_scale_factor) + point_features_list.append(point_bev_features.contiguous()) + + batch_size, num_keypoints, _ = keypoints.shape + key_xyz = keypoints.view(-1, 3) + key_xyz_batch_cnt = key_xyz.new_zeros(batch_size).int().fill_( + num_keypoints) + + if self.rawpoints_sa_layer is not None: + batch_points = torch.cat(points, dim=0) + batch_cnt = [len(p) for p in points] + xyz = batch_points[:, :3].contiguous() + features = None + if batch_points.size(1) > 0: + features = batch_points[:, 3:].contiguous() + xyz_batch_cnt = xyz.new_tensor(batch_cnt, dtype=torch.int32) + + pooled_points, pooled_features = self.rawpoints_sa_layer( + xyz=xyz.contiguous(), + xyz_batch_cnt=xyz_batch_cnt, + new_xyz=key_xyz.contiguous(), + new_xyz_batch_cnt=key_xyz_batch_cnt, + features=features.contiguous(), + ) + + point_features_list.append(pooled_features.contiguous().view( + batch_size, num_keypoints, -1)) + if self.voxel_sa_layers is not None: + for k, voxel_sa_layer in enumerate(self.voxel_sa_layers): + cur_coords = voxel_encode_features[k].coords # Changed to coords + xyz = self.get_voxel_centers( + coors=cur_coords, + scale_factor=self.voxel_sa_configs_list[k].scale_factor + ).contiguous() + xyz_batch_cnt = xyz.new_zeros(batch_size).int() + for bs_idx in range(batch_size): + xyz_batch_cnt[bs_idx] = (cur_coords[:, 0] == bs_idx).sum() + pooled_points, pooled_features = voxel_sa_layer( + xyz=xyz.contiguous(), + xyz_batch_cnt=xyz_batch_cnt, + new_xyz=key_xyz.contiguous(), + new_xyz_batch_cnt=key_xyz_batch_cnt, + features=voxel_encode_features[k].feats, # @Yingqi: changed from features to feats + ) + point_features_list.append(pooled_features.contiguous().view( + batch_size, num_keypoints, -1)) + + point_features = torch.cat( + point_features_list, dim=-1).view(batch_size * num_keypoints, -1, + 1) + + fusion_point_features = self.point_feature_fusion_layer( + point_features.unsqueeze(dim=-1)).squeeze(dim=-1) + + batch_idxs = torch.arange( + batch_size * num_keypoints, device=keypoints.device + ) // num_keypoints # batch indexes of each key points + batch_keypoints_xyz = torch.cat( + (batch_idxs.to(key_xyz.dtype).unsqueeze(dim=-1), key_xyz), dim=-1) + + return dict( + keypoint_features=point_features.squeeze(dim=-1), + fusion_keypoint_features=fusion_point_features.squeeze(dim=-1), + keypoints=batch_keypoints_xyz) diff --git a/examples/mmdetection3d/ts_plugin/models/roi_heads/bbox_heads/__init__.py b/examples/mmdetection3d/ts_plugin/models/roi_heads/bbox_heads/__init__.py new file mode 100644 index 0000000..4d397a7 --- /dev/null +++ b/examples/mmdetection3d/ts_plugin/models/roi_heads/bbox_heads/__init__.py @@ -0,0 +1 @@ +from .parta2_bbox_head import PartA2BboxHeadTS \ No newline at end of file diff --git a/examples/mmdetection3d/ts_plugin/models/roi_heads/bbox_heads/parta2_bbox_head.py b/examples/mmdetection3d/ts_plugin/models/roi_heads/bbox_heads/parta2_bbox_head.py new file mode 100644 index 0000000..86b053d --- /dev/null +++ b/examples/mmdetection3d/ts_plugin/models/roi_heads/bbox_heads/parta2_bbox_head.py @@ -0,0 +1,717 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Dict, List, Tuple + +import numpy as np +import torch +from mmcv.cnn import ConvModule +from mmdet.models.utils import multi_apply +from mmengine.model import normal_init +from mmengine.structures import InstanceData +from torch import Tensor + +from ...layers.sparse_block import make_sparse_convmodule_ts +from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE +from mmdet3d.utils.typing_utils import InstanceList + +if IS_SPCONV2_AVAILABLE: + from spconv.pytorch import (SparseConvTensor, SparseMaxPool3d, + SparseSequential) +else: + from mmcv.ops import SparseConvTensor, SparseMaxPool3d, SparseSequential + +from mmengine.model import BaseModule +from torch import nn as nn + +from mmdet3d.models.layers import nms_bev, nms_normal_bev +from mmdet3d.registry import MODELS, TASK_UTILS +from mmdet3d.structures.bbox_3d import (LiDARInstance3DBoxes, + rotation_3d_in_axis, xywhr2xyxyr) +from mmdet3d.utils.typing_utils import SamplingResultList + +import torchsparse +from torchsparse import SparseTensor + +import os + +savepath = os.environ.get("PT_SAVE_PATH") + +class Flag(torch.nn.Module): + def __init__(self, msg) -> None: + super().__init__() + self.msg = msg + def forward(self, x): + print(f"Layer: {self.msg}") + return x + +class MaxPool3DWrap(torch.nn.Module): + def __init__(self, kernel_size, stride): + super().__init__() + self.kernel_size = kernel_size + self.stride = stride + # self.pool = nn.MaxPool3d(kernel_size, stride) + self.pool = SparseMaxPool3d(kernel_size, stride) + + def forward(self, tensor: SparseTensor): + sptensor = SparseConvTensor(tensor.feats, tensor.coords, tensor.spatial_range[1:4], tensor.spatial_range[0]) # tensor.spatial_range[0] + pooled = self.pool(sptensor) + spatial_range = (tensor.spatial_range[0],) + tuple(pooled.spatial_shape) # tensor.spatial_range[0] # 100 in testing + tstensor = SparseTensor(pooled.features, pooled.indices, spatial_range=spatial_range) + return tstensor + + # dense_tensor = tensor.dense() + # pooled = self.pool(dense_tensor) + + # batch_size = pooled.shape[0] + + # # transform to sparse tensors + # sparse_shape = pooled.shape[1:4] + + # # (non_empty_num, 4) ==> [bs_idx, x_idx, y_idx, z_idx] + # sparse_idx = pooled.sum(dim=-1).nonzero(as_tuple=False) + + # pooled_1 = pooled[sparse_idx[:, 0], sparse_idx[:, 1], + # sparse_idx[:, 2], sparse_idx[:, 3]] + # coords = sparse_idx.int().contiguous() + # spatial_range = (coords[:, 0].max().item() + 1,) + tuple(sparse_shape) + # pooled_1 = torchsparse.SparseTensor(pooled_1, coords, spatial_range=spatial_range, batch_size=batch_size) + # return pooled_1 + +@MODELS.register_module("PartA2BboxHeadTS") +class PartA2BboxHeadTS(BaseModule): + """PartA2 RoI head. + + Args: + num_classes (int): The number of classes to prediction. + seg_in_channels (int): Input channels of segmentation + convolution layer. + part_in_channels (int): Input channels of part convolution layer. + seg_conv_channels (list(int)): Out channels of each + segmentation convolution layer. + part_conv_channels (list(int)): Out channels of each + part convolution layer. + merge_conv_channels (list(int)): Out channels of each + feature merged convolution layer. + down_conv_channels (list(int)): Out channels of each + downsampled convolution layer. + shared_fc_channels (list(int)): Out channels of each shared fc layer. + cls_channels (list(int)): Out channels of each classification layer. + reg_channels (list(int)): Out channels of each regression layer. + dropout_ratio (float): Dropout ratio of classification and + regression layers. + roi_feat_size (int): The size of pooled roi features. + with_corner_loss (bool): Whether to use corner loss or not. + bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for box head. + conv_cfg (dict): Config dict of convolutional layers + norm_cfg (dict): Config dict of normalization layers + loss_bbox (dict): Config dict of box regression loss. + loss_cls (dict, optional): Config dict of classifacation loss. + """ + + def __init__(self, + num_classes: int, + seg_in_channels: int, + part_in_channels: int, + seg_conv_channels: List[int] = None, + part_conv_channels: List[int] = None, + merge_conv_channels: List[int] = None, + down_conv_channels: List[int] = None, + shared_fc_channels: List[int] = None, + cls_channels: List[int] = None, + reg_channels: List[int] = None, + dropout_ratio: float = 0.1, + roi_feat_size: int = 14, + with_corner_loss: bool = True, + bbox_coder: dict = dict(type='DeltaXYZWLHRBBoxCoder'), + conv_cfg: dict = dict(type='Conv1d'), + norm_cfg: dict = dict(type='BN1d', eps=1e-3, momentum=0.01), + loss_bbox: dict = dict( + type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), + loss_cls: dict = dict( + type='CrossEntropyLoss', + use_sigmoid=True, + reduction='none', + loss_weight=1.0), + init_cfg: dict = None) -> None: + super(PartA2BboxHeadTS, self).__init__(init_cfg=init_cfg) + self.num_classes = num_classes + self.with_corner_loss = with_corner_loss + self.bbox_coder = TASK_UTILS.build(bbox_coder) + self.loss_bbox = MODELS.build(loss_bbox) + self.loss_cls = MODELS.build(loss_cls) + self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) + + sparse_norm_cfg = dict(type='TorchSparseBatchNorm', eps=1e-3, momentum=0.01) + + assert down_conv_channels[-1] == shared_fc_channels[0] + + # init layers + part_channel_last = part_in_channels + part_conv = [] + for i, channel in enumerate(part_conv_channels): + part_conv.append( + make_sparse_convmodule_ts( + part_channel_last, + channel, + 3, + padding=1, + norm_cfg=sparse_norm_cfg, + # indice_key=f'rcnn_part{i}', # @Yingqi: commented it out + conv_type='TorchSparseConv3d')) # @Yingqi: changed to TorchSparseConv3d + part_channel_last = channel + self.part_conv = nn.Sequential(*part_conv) + + seg_channel_last = seg_in_channels + seg_conv = [] + for i, channel in enumerate(seg_conv_channels): + seg_conv.append( + make_sparse_convmodule_ts( + seg_channel_last, + channel, + 3, + padding=1, + norm_cfg=sparse_norm_cfg, + # indice_key=f'rcnn_seg{i}', + conv_type='TorchSparseConv3d')) + seg_channel_last = channel + self.seg_conv = nn.Sequential(*seg_conv) + + self.conv_down = nn.Sequential() + + merge_conv_channel_last = part_channel_last + seg_channel_last + merge_conv = [] + for i, channel in enumerate(merge_conv_channels): + merge_conv.append( + make_sparse_convmodule_ts( + merge_conv_channel_last, + channel, + 3, + padding=1, + norm_cfg=sparse_norm_cfg)) + # indice_key='rcnn_down0')) + merge_conv_channel_last = channel + + down_conv_channel_last = merge_conv_channel_last + conv_down = [] + for i, channel in enumerate(down_conv_channels): + conv_down.append( + make_sparse_convmodule_ts( + down_conv_channel_last, + channel, + 3, + padding=1, + norm_cfg=sparse_norm_cfg)) + # indice_key='rcnn_down1')) + down_conv_channel_last = channel + # print(self) + + #self.conv_down.add_module("flag_before_merge_conv", Flag("before merge conv")) + self.conv_down.add_module('merge_conv', nn.Sequential(*merge_conv)) + #self.conv_down.add_module("flag_after_merge_conv", Flag("after merge conv")) + self.conv_down.add_module('max_pool3d', MaxPool3DWrap(kernel_size=2, stride=2)) + #self.conv_down.add_module("flag_after_max_pool3d", Flag("after max pool3d")) + self.conv_down.add_module('down_conv', nn.Sequential(*conv_down)) + #self.conv_down.add_module("flag_after_down_conv", Flag("after down conv")) + + shared_fc_list = [] + pool_size = roi_feat_size // 2 + pre_channel = shared_fc_channels[0] * pool_size**3 + for k in range(1, len(shared_fc_channels)): + shared_fc_list.append( + ConvModule( + pre_channel, + shared_fc_channels[k], + 1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + inplace=True)) + pre_channel = shared_fc_channels[k] + + if k != len(shared_fc_channels) - 1 and dropout_ratio > 0: + shared_fc_list.append(nn.Dropout(dropout_ratio)) + + self.shared_fc = nn.Sequential(*shared_fc_list) + + # Classification layer + channel_in = shared_fc_channels[-1] + cls_channel = 1 + cls_layers = [] + pre_channel = channel_in + for k in range(0, len(cls_channels)): + cls_layers.append( + ConvModule( + pre_channel, + cls_channels[k], + 1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + inplace=True)) + pre_channel = cls_channels[k] + cls_layers.append( + ConvModule( + pre_channel, + cls_channel, + 1, + padding=0, + conv_cfg=conv_cfg, + act_cfg=None)) + if dropout_ratio >= 0: + cls_layers.insert(1, nn.Dropout(dropout_ratio)) + + self.conv_cls = nn.Sequential(*cls_layers) + + # Regression layer + reg_layers = [] + pre_channel = channel_in + for k in range(0, len(reg_channels)): + reg_layers.append( + ConvModule( + pre_channel, + reg_channels[k], + 1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + inplace=True)) + pre_channel = reg_channels[k] + reg_layers.append( + ConvModule( + pre_channel, + self.bbox_coder.code_size, + 1, + padding=0, + conv_cfg=conv_cfg, + act_cfg=None)) + if dropout_ratio >= 0: + reg_layers.insert(1, nn.Dropout(dropout_ratio)) + + self.conv_reg = nn.Sequential(*reg_layers) + + if init_cfg is None: + self.init_cfg = dict( + type='Xavier', + layer=['Conv2d', 'Conv1d'], + distribution='uniform') + print(self.conv_down) + pass + + def init_weights(self): + super().init_weights() + normal_init(self.conv_reg[-1].conv, mean=0, std=0.001) + + def forward(self, seg_feats: Tensor, part_feats: Tensor) -> Tuple[Tensor]: + """Forward pass. + + Args: + seg_feats (torch.Tensor): Point-wise semantic features. + part_feats (torch.Tensor): Point-wise part prediction features. + + Returns: + tuple[torch.Tensor]: Score of class and bbox predictions. + """ + # (B * N, out_x, out_y, out_z, 4) + rcnn_batch_size = part_feats.shape[0] + + # transform to sparse tensors + sparse_shape = part_feats.shape[1:4] + # (non_empty_num, 4) ==> [bs_idx, x_idx, y_idx, z_idx] + # (non_empty_num, 4) ==> [bs_idx, x_idx, y_idx, z_idx] + sparse_idx = part_feats.sum(dim=-1).nonzero(as_tuple=False) + + part_features = part_feats[sparse_idx[:, 0], sparse_idx[:, 1], + sparse_idx[:, 2], sparse_idx[:, 3]] + seg_features = seg_feats[sparse_idx[:, 0], sparse_idx[:, 1], + sparse_idx[:, 2], sparse_idx[:, 3]] + coords = sparse_idx.int().contiguous() + spatial_range = (rcnn_batch_size,) + tuple(sparse_shape) # first index become 99??? # coords[:, 0].max().item() + 1 + part_features = torchsparse.SparseTensor(part_features, coords, spatial_range=spatial_range) + seg_features = torchsparse.SparseTensor(seg_features, coords, spatial_range=spatial_range) + + # forward rcnn network + x_part = self.part_conv(part_features) + x_rpn = self.seg_conv(seg_features) + + merged_feature = torch.cat((x_rpn.feats, x_part.feats), + dim=1) # (N, C) + shared_feature = torchsparse.SparseTensor(merged_feature, coords, spatial_range=spatial_range) # spatial_range = spatial_range # (100,14,14,14) + # spatial range becomes 99 above + + x = self.conv_down(shared_feature) + + shared_feature = x.dense() # rcnn_batch_size # shared_feature.spatial_range[0] + + N, D, H, W, C = shared_feature.shape + shared_feature = shared_feature.permute(0,4,1,2,3).contiguous() + shared_feature = shared_feature.view(N, C * D, H, W) + shared_feature = shared_feature.view(rcnn_batch_size, -1, 1) + + shared_feature = self.shared_fc(shared_feature) + + cls_score = self.conv_cls(shared_feature).transpose( + 1, 2).contiguous().squeeze(dim=1) # (B, 1) + bbox_pred = self.conv_reg(shared_feature).transpose( + 1, 2).contiguous().squeeze(dim=1) # (B, C) + return cls_score, bbox_pred + + def loss(self, cls_score: Tensor, bbox_pred: Tensor, rois: Tensor, + labels: Tensor, bbox_targets: Tensor, pos_gt_bboxes: Tensor, + reg_mask: Tensor, label_weights: Tensor, + bbox_weights: Tensor) -> Dict: + """Computing losses. + + Args: + cls_score (torch.Tensor): Scores of each roi. + bbox_pred (torch.Tensor): Predictions of bboxes. + rois (torch.Tensor): Roi bboxes. + labels (torch.Tensor): Labels of class. + bbox_targets (torch.Tensor): Target of positive bboxes. + pos_gt_bboxes (torch.Tensor): Ground truths of positive bboxes. + reg_mask (torch.Tensor): Mask for positive bboxes. + label_weights (torch.Tensor): Weights of class loss. + bbox_weights (torch.Tensor): Weights of bbox loss. + + Returns: + dict: Computed losses. + + - loss_cls (torch.Tensor): Loss of classes. + - loss_bbox (torch.Tensor): Loss of bboxes. + - loss_corner (torch.Tensor): Loss of corners. + """ + losses = dict() + rcnn_batch_size = cls_score.shape[0] + + # calculate class loss + cls_flat = cls_score.view(-1) + loss_cls = self.loss_cls(cls_flat, labels, label_weights) + losses['loss_cls'] = loss_cls + + # calculate regression loss + code_size = self.bbox_coder.code_size + pos_inds = (reg_mask > 0) + if pos_inds.any() == 0: + # fake a part loss + losses['loss_bbox'] = loss_cls.new_tensor(0) * loss_cls.sum() + if self.with_corner_loss: + losses['loss_corner'] = loss_cls.new_tensor(0) * loss_cls.sum() + else: + pos_bbox_pred = bbox_pred.view(rcnn_batch_size, -1)[pos_inds] + bbox_weights_flat = bbox_weights[pos_inds].view(-1, 1).repeat( + 1, pos_bbox_pred.shape[-1]) + loss_bbox = self.loss_bbox( + pos_bbox_pred.unsqueeze(dim=0), bbox_targets.unsqueeze(dim=0), + bbox_weights_flat.unsqueeze(dim=0)) + losses['loss_bbox'] = loss_bbox + + if self.with_corner_loss: + pos_roi_boxes3d = rois[..., 1:].view(-1, code_size)[pos_inds] + pos_roi_boxes3d = pos_roi_boxes3d.view(-1, code_size) + batch_anchors = pos_roi_boxes3d.clone().detach() + pos_rois_rotation = pos_roi_boxes3d[..., 6].view(-1) + roi_xyz = pos_roi_boxes3d[..., 0:3].view(-1, 3) + batch_anchors[..., 0:3] = 0 + # decode boxes + pred_boxes3d = self.bbox_coder.decode( + batch_anchors, + pos_bbox_pred.view(-1, code_size)).view(-1, code_size) + + pred_boxes3d[..., 0:3] = rotation_3d_in_axis( + pred_boxes3d[..., 0:3].unsqueeze(1), + pos_rois_rotation, + axis=2).squeeze(1) + + pred_boxes3d[:, 0:3] += roi_xyz + + # calculate corner loss + loss_corner = self.get_corner_loss_lidar( + pred_boxes3d, pos_gt_bboxes) + losses['loss_corner'] = loss_corner + + return losses + + def get_targets(self, + sampling_results: SamplingResultList, + rcnn_train_cfg: dict, + concat: bool = True) -> Tuple[Tensor]: + """Generate targets. + + Args: + sampling_results (list[:obj:`SamplingResult`]): + Sampled results from rois. + rcnn_train_cfg (:obj:`ConfigDict`): Training config of rcnn. + concat (bool): Whether to concatenate targets between batches. + + Returns: + tuple[torch.Tensor]: Targets of boxes and class prediction. + """ + pos_bboxes_list = [res.pos_bboxes for res in sampling_results] + pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results] + iou_list = [res.iou for res in sampling_results] + targets = multi_apply( + self._get_target_single, + pos_bboxes_list, + pos_gt_bboxes_list, + iou_list, + cfg=rcnn_train_cfg) + + (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights, + bbox_weights) = targets + + if concat: + label = torch.cat(label, 0) + bbox_targets = torch.cat(bbox_targets, 0) + pos_gt_bboxes = torch.cat(pos_gt_bboxes, 0) + reg_mask = torch.cat(reg_mask, 0) + + label_weights = torch.cat(label_weights, 0) + label_weights /= torch.clamp(label_weights.sum(), min=1.0) + + bbox_weights = torch.cat(bbox_weights, 0) + bbox_weights /= torch.clamp(bbox_weights.sum(), min=1.0) + + return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights, + bbox_weights) + + def _get_target_single(self, pos_bboxes: Tensor, pos_gt_bboxes: Tensor, + ious: Tensor, cfg: dict) -> Tuple[Tensor]: + """Generate training targets for a single sample. + + Args: + pos_bboxes (torch.Tensor): Positive boxes with shape + (N, 7). + pos_gt_bboxes (torch.Tensor): Ground truth boxes with shape + (M, 7). + ious (torch.Tensor): IoU between `pos_bboxes` and `pos_gt_bboxes` + in shape (N, M). + cfg (dict): Training configs. + + Returns: + tuple[torch.Tensor]: Target for positive boxes. + (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights, + bbox_weights) + """ + cls_pos_mask = ious > cfg.cls_pos_thr + cls_neg_mask = ious < cfg.cls_neg_thr + interval_mask = (cls_pos_mask == 0) & (cls_neg_mask == 0) + + # iou regression target + label = (cls_pos_mask > 0).float() + label[interval_mask] = ious[interval_mask] * 2 - 0.5 + # label weights + label_weights = (label >= 0).float() + + # box regression target + reg_mask = pos_bboxes.new_zeros(ious.size(0)).long() + reg_mask[0:pos_gt_bboxes.size(0)] = 1 + bbox_weights = (reg_mask > 0).float() + if reg_mask.bool().any(): + pos_gt_bboxes_ct = pos_gt_bboxes.clone().detach() + roi_center = pos_bboxes[..., 0:3] + roi_ry = pos_bboxes[..., 6] % (2 * np.pi) + + # canonical transformation + pos_gt_bboxes_ct[..., 0:3] -= roi_center + pos_gt_bboxes_ct[..., 6] -= roi_ry + pos_gt_bboxes_ct[..., 0:3] = rotation_3d_in_axis( + pos_gt_bboxes_ct[..., 0:3].unsqueeze(1), -roi_ry, + axis=2).squeeze(1) + + # flip orientation if rois have opposite orientation + ry_label = pos_gt_bboxes_ct[..., 6] % (2 * np.pi) # 0 ~ 2pi + opposite_flag = (ry_label > np.pi * 0.5) & (ry_label < np.pi * 1.5) + ry_label[opposite_flag] = (ry_label[opposite_flag] + np.pi) % ( + 2 * np.pi) # (0 ~ pi/2, 3pi/2 ~ 2pi) + flag = ry_label > np.pi + ry_label[flag] = ry_label[flag] - np.pi * 2 # (-pi/2, pi/2) + ry_label = torch.clamp(ry_label, min=-np.pi / 2, max=np.pi / 2) + pos_gt_bboxes_ct[..., 6] = ry_label + + rois_anchor = pos_bboxes.clone().detach() + rois_anchor[:, 0:3] = 0 + rois_anchor[:, 6] = 0 + bbox_targets = self.bbox_coder.encode(rois_anchor, + pos_gt_bboxes_ct) + else: + # no fg bbox + bbox_targets = pos_gt_bboxes.new_empty((0, 7)) + + return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights, + bbox_weights) + + def get_corner_loss_lidar(self, + pred_bbox3d: Tensor, + gt_bbox3d: Tensor, + delta: float = 1.0) -> Tensor: + """Calculate corner loss of given boxes. + + Args: + pred_bbox3d (torch.FloatTensor): Predicted boxes in shape (N, 7). + gt_bbox3d (torch.FloatTensor): Ground truth boxes in shape (N, 7). + delta (float, optional): huber loss threshold. Defaults to 1.0 + + Returns: + torch.FloatTensor: Calculated corner loss in shape (N). + """ + assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0] + + # This is a little bit hack here because we assume the box for + # Part-A2 is in LiDAR coordinates + gt_boxes_structure = LiDARInstance3DBoxes(gt_bbox3d) + pred_box_corners = LiDARInstance3DBoxes(pred_bbox3d).corners + gt_box_corners = gt_boxes_structure.corners + + # This flip only changes the heading direction of GT boxes + gt_bbox3d_flip = gt_boxes_structure.clone() + gt_bbox3d_flip.tensor[:, 6] += np.pi + gt_box_corners_flip = gt_bbox3d_flip.corners + + corner_dist = torch.min( + torch.norm(pred_box_corners - gt_box_corners, dim=2), + torch.norm(pred_box_corners - gt_box_corners_flip, + dim=2)) # (N, 8) + # huber loss + abs_error = corner_dist.abs() + quadratic = abs_error.clamp(max=delta) + linear = (abs_error - quadratic) + corner_loss = 0.5 * quadratic**2 + delta * linear + + return corner_loss.mean(dim=1) + + def get_results(self, + rois: Tensor, + cls_score: Tensor, + bbox_pred: Tensor, + class_labels: Tensor, + class_pred: Tensor, + input_metas: List[dict], + cfg: dict = None) -> InstanceList: + """Generate bboxes from bbox head predictions. + + Args: + rois (torch.Tensor): Roi bounding boxes. + cls_score (torch.Tensor): Scores of bounding boxes. + bbox_pred (torch.Tensor): Bounding boxes predictions + class_labels (torch.Tensor): Label of classes + class_pred (torch.Tensor): Score for nms. + input_metas (list[dict]): Point cloud and image's meta info. + cfg (:obj:`ConfigDict`): Testing config. + + Returns: + list[:obj:`InstanceData`]: Detection results of each sample + after the post process. + Each item usually contains following keys. + + - scores_3d (Tensor): Classification scores, has a shape + (num_instances, ) + - labels_3d (Tensor): Labels of bboxes, has a shape + (num_instances, ). + - bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes, + contains a tensor with shape (num_instances, C), where + C >= 7. + """ + roi_batch_id = rois[..., 0] + roi_boxes = rois[..., 1:] # boxes without batch id + batch_size = int(roi_batch_id.max().item() + 1) + + # decode boxes + roi_ry = roi_boxes[..., 6].view(-1) + roi_xyz = roi_boxes[..., 0:3].view(-1, 3) + local_roi_boxes = roi_boxes.clone().detach() + local_roi_boxes[..., 0:3] = 0 + rcnn_boxes3d = self.bbox_coder.decode(local_roi_boxes, bbox_pred) + rcnn_boxes3d[..., 0:3] = rotation_3d_in_axis( + rcnn_boxes3d[..., 0:3].unsqueeze(1), roi_ry, axis=2).squeeze(1) + rcnn_boxes3d[:, 0:3] += roi_xyz + + # post processing + result_list = [] + for batch_id in range(batch_size): + cur_class_labels = class_labels[batch_id] + cur_cls_score = cls_score[roi_batch_id == batch_id].view(-1) + + cur_box_prob = class_pred[batch_id] + cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id] + keep = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d, + cfg.score_thr, cfg.nms_thr, + input_metas[batch_id], + cfg.use_rotate_nms) + selected_bboxes = cur_rcnn_boxes3d[keep] + selected_label_preds = cur_class_labels[keep] + selected_scores = cur_cls_score[keep] + + results = InstanceData() + results.bboxes_3d = input_metas[batch_id]['box_type_3d']( + selected_bboxes, self.bbox_coder.code_size) + results.scores_3d = selected_scores + results.labels_3d = selected_label_preds + + result_list.append(results) + return result_list + + def multi_class_nms(self, + box_probs: Tensor, + box_preds: Tensor, + score_thr: float, + nms_thr: float, + input_meta: dict, + use_rotate_nms: bool = True) -> Tensor: + """Multi-class NMS for box head. + + Note: + This function has large overlap with the `box3d_multiclass_nms` + implemented in `mmdet3d.core.post_processing`. We are considering + merging these two functions in the future. + + Args: + box_probs (torch.Tensor): Predicted boxes probabitilies in + shape (N,). + box_preds (torch.Tensor): Predicted boxes in shape (N, 7+C). + score_thr (float): Threshold of scores. + nms_thr (float): Threshold for NMS. + input_meta (dict): Meta information of the current sample. + use_rotate_nms (bool, optional): Whether to use rotated nms. + Defaults to True. + + Returns: + torch.Tensor: Selected indices. + """ + if use_rotate_nms: + nms_func = nms_bev + else: + nms_func = nms_normal_bev + + assert box_probs.shape[ + 1] == self.num_classes, f'box_probs shape: {str(box_probs.shape)}' + selected_list = [] + selected_labels = [] + boxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d']( + box_preds, self.bbox_coder.code_size).bev) + + score_thresh = score_thr if isinstance( + score_thr, list) else [score_thr for x in range(self.num_classes)] + nms_thresh = nms_thr if isinstance( + nms_thr, list) else [nms_thr for x in range(self.num_classes)] + for k in range(0, self.num_classes): + class_scores_keep = box_probs[:, k] >= score_thresh[k] + + if class_scores_keep.int().sum() > 0: + original_idxs = class_scores_keep.nonzero( + as_tuple=False).view(-1) + cur_boxes_for_nms = boxes_for_nms[class_scores_keep] + cur_rank_scores = box_probs[class_scores_keep, k] + + cur_selected = nms_func(cur_boxes_for_nms, cur_rank_scores, + nms_thresh[k]) + + if cur_selected.shape[0] == 0: + continue + selected_list.append(original_idxs[cur_selected]) + selected_labels.append( + torch.full([cur_selected.shape[0]], + k + 1, + dtype=torch.int64, + device=box_preds.device)) + + keep = torch.cat( + selected_list, dim=0) if len(selected_list) > 0 else [] + return keep diff --git a/examples/openpcdet/README.md b/examples/openpcdet/README.md new file mode 100644 index 0000000..9c81f4a --- /dev/null +++ b/examples/openpcdet/README.md @@ -0,0 +1,66 @@ +# TorchSparse for OpenPCDet Plugin Demo + +This tutorial demonstrates how to evaluate TorchSparse integrated OpenPCDet models. Follow the steps below to install dependencies, configure paths, convert model weights, and run the demo. + +## Dependencies + +1. **Conda**: Ensure Conda is installed. +2. **OpenPCDet Installation**: Follow the [OpenPCDet documentation](https://github.com/open-mmlab/OpenPCDet/blob/master/docs/INSTALL.md). +3. **Dataset Preparation**: Pre-process the datasets as described [here](https://github.com/open-mmlab/OpenPCDet/blob/master/docs/GETTING_STARTED.md). +4. **TorchSparse Installation**: Install [TorchSparse](https://github.com/mit-han-lab/torchsparse). +5. **Install TorchSparse Plugin for OpenPCDet**: + 1. Clone this repository. + 2. Define the environment variable `PCDET_BASE` to point to the installation path of OpenPCDet. + 3. Navigate to `examples/openpcdet` and run `pip install -v -e .`. + +## Notes + +- You may need to disable PyTorch JIT compile to avoid errors. Add the following to the import section of the relevant `.py` file: + ```python + import torch + torch.jit._state.disable() + ``` +- Modify dataset paths in the model config to absolute paths to avoid `FileNotFoundError`. + +## Steps + +1. Install the dependencies. +2. Specify the base paths and model registry. +3. **IMPORTANT,** Activate the plugin: In `OpenPCDet/tools/test.py`, add `import pcdet_plugin` as the last import statement to activate the plugin. +4. Run the evaluation. + +## Supported Models + +- Kitti: SECOND, PV-RCNN, Part-A2 +- NuScenes: VoxelNeXt + +## Load the Weight Conversion Module +The dimensions of TorchSparse differ from SpConv, so parameter dimension conversion is required. You can use `convert_weights_cmd()` in converter.py as a command line tool or use `convert_weights()` as an API. Both functions have four parameters: + +1. `ckpt_before`: Path to the input SpConv checkpoint file. +2. `ckpt_after`: Path where the converted TorchSparse checkpoint will be saved. +3. `cfg_path`: Path to the configuration mmdet3d file of the model. +4. `v_spconv`: Version of SpConv used in the original model (1 or 2). +5. `framework`: Choose between `'openpc'` and `'mmdet3d'`, default to `'mmdet3d'`. + +These parameters allow the converter to locate the input model, specify the output location, understand the model's architecture, and apply the appropriate conversion method based for specific Sparse Conv layers. + +Example conversion commands: +```bash +python examples/converter.py --ckpt_before ../OpenPCDet/models/SECOND/second_7862.pth --cfg_path ../OpenPCDet/tools/cfgs/kitti_models/second.yaml --ckpt_after ./converted/SECOND/second_7862.pth --v_spconv 1 --framework openpc +``` + + +## Run the Evaluation +In your Conda environment with all the dependencies installed, run the following for the evaluation: +```bash +python --cfg_file --ckpt +``` + +- `test_file_path`: the evaluatino script in OpenPC. +- `torchsparse_cfg_path`: the config file of the model, in `examples/openpcdet/cfgs` folder of this repository. +- `torchsparse_model_path`: converted TorchSparse checkpoint. + + +### VoxelNeXt +VoxelNeXt requires `examples/openpcdet/converter_voxelnext.py` as a model converter, rather than the general converter.py. diff --git a/examples/openpcdet/cfgs_templates/kitti_models/PartA2_plugin.yaml b/examples/openpcdet/cfgs_templates/kitti_models/PartA2_plugin.yaml new file mode 100644 index 0000000..28dbe7c --- /dev/null +++ b/examples/openpcdet/cfgs_templates/kitti_models/PartA2_plugin.yaml @@ -0,0 +1,191 @@ +CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist'] + +DATA_CONFIG: + _BASE_CONFIG_: {{ pcdet_base_path }}/tools/cfgs/dataset_configs/kitti_dataset.yaml + + +MODEL: + NAME: PartA2Net + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: UNetV2TS + + MAP_TO_BEV: + NAME: HeightCompressionTS + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: AnchorHeadSingle + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: True + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Car', + 'anchor_sizes': [[3.9, 1.6, 1.56]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.6, + 'unmatched_threshold': 0.45 + }, + { + 'class_name': 'Pedestrian', + 'anchor_sizes': [[0.8, 0.6, 1.73]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.5, + 'unmatched_threshold': 0.35 + }, + { + 'class_name': 'Cyclist', + 'anchor_sizes': [[1.76, 0.6, 1.73]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.5, + 'unmatched_threshold': 0.35 + } + ] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + POINT_HEAD: + NAME: PointIntraPartOffsetHead + CLS_FC: [] + PART_FC: [] + CLASS_AGNOSTIC: True + TARGET_CONFIG: + GT_EXTRA_WIDTH: [0.2, 0.2, 0.2] + LOSS_CONFIG: + LOSS_REG: smooth-l1 + LOSS_WEIGHTS: { + 'point_cls_weight': 1.0, + 'point_part_weight': 1.0 + } + + ROI_HEAD: + NAME: PartA2FCHeadTS + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + SEG_MASK_SCORE_THRESH: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 9000 + NMS_POST_MAXSIZE: 512 + NMS_THRESH: 0.8 + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 1024 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + + ROI_AWARE_POOL: + POOL_SIZE: 12 + NUM_FEATURES: 128 + MAX_POINTS_PER_VOXEL: 128 + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + ROI_PER_IMAGE: 128 + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.65 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.1 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.1 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 4 + NUM_EPOCHS: 80 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 diff --git a/examples/openpcdet/cfgs_templates/kitti_models/pv_rcnn_plugin.yaml b/examples/openpcdet/cfgs_templates/kitti_models/pv_rcnn_plugin.yaml new file mode 100644 index 0000000..0a6c060 --- /dev/null +++ b/examples/openpcdet/cfgs_templates/kitti_models/pv_rcnn_plugin.yaml @@ -0,0 +1,249 @@ +CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist'] + +DATA_CONFIG: + _BASE_CONFIG_: {{ pcdet_base_path }}/tools/cfgs/dataset_configs/kitti_dataset.yaml # This should points to the path of the original dataset config file in OpenPCDet Repo + DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: gt_sampling + USE_ROAD_PLANE: False + DB_INFO_PATH: + - kitti_dbinfos_train.pkl + PREPARE: { + filter_by_min_points: ['Car:5', 'Pedestrian:5', 'Cyclist:5'], + filter_by_difficulty: [-1], + } + + SAMPLE_GROUPS: ['Car:15','Pedestrian:10', 'Cyclist:10'] + NUM_POINT_FEATURES: 4 + DATABASE_WITH_FAKELIDAR: False + REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + LIMIT_WHOLE_SCENE: False + + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + +MODEL: + NAME: PVRCNN + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelBackBone8xTS + + MAP_TO_BEV: + NAME: HeightCompressionTS + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: AnchorHeadSingle + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: True + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Car', + 'anchor_sizes': [[3.9, 1.6, 1.56]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.6, + 'unmatched_threshold': 0.45 + }, + { + 'class_name': 'Pedestrian', + 'anchor_sizes': [[0.8, 0.6, 1.73]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.6], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.5, + 'unmatched_threshold': 0.35 + }, + { + 'class_name': 'Cyclist', + 'anchor_sizes': [[1.76, 0.6, 1.73]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.6], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.5, + 'unmatched_threshold': 0.35 + } + ] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + PFE: + NAME: VoxelSetAbstractionTS + POINT_SOURCE: raw_points + NUM_KEYPOINTS: 2048 + NUM_OUTPUT_FEATURES: 128 + SAMPLE_METHOD: FPS + + FEATURES_SOURCE: ['bev', 'x_conv1', 'x_conv2', 'x_conv3', 'x_conv4', 'raw_points'] + SA_LAYER: + raw_points: + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv1: + DOWNSAMPLE_FACTOR: 1 + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv2: + DOWNSAMPLE_FACTOR: 2 + MLPS: [[32, 32], [32, 32]] + POOL_RADIUS: [0.8, 1.2] + NSAMPLE: [16, 32] + x_conv3: + DOWNSAMPLE_FACTOR: 4 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [1.2, 2.4] + NSAMPLE: [16, 32] + x_conv4: + DOWNSAMPLE_FACTOR: 8 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [2.4, 4.8] + NSAMPLE: [16, 32] + + POINT_HEAD: + NAME: PointHeadSimple + CLS_FC: [256, 256] + CLASS_AGNOSTIC: True + USE_POINT_FEATURES_BEFORE_FUSION: True + TARGET_CONFIG: + GT_EXTRA_WIDTH: [0.2, 0.2, 0.2] + LOSS_CONFIG: + LOSS_REG: smooth-l1 + LOSS_WEIGHTS: { + 'point_cls_weight': 1.0, + } + + ROI_HEAD: + NAME: PVRCNNHead + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 9000 + NMS_POST_MAXSIZE: 512 + NMS_THRESH: 0.8 + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 1024 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + + ROI_GRID_POOL: + GRID_SIZE: 6 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [0.8, 1.6] + NSAMPLE: [16, 16] + POOL_METHOD: max_pool + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + ROI_PER_IMAGE: 128 + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.55 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.1 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.1 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 2 + NUM_EPOCHS: 80 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 diff --git a/examples/openpcdet/cfgs_templates/kitti_models/second_plugin.yaml b/examples/openpcdet/cfgs_templates/kitti_models/second_plugin.yaml new file mode 100644 index 0000000..b0d2838 --- /dev/null +++ b/examples/openpcdet/cfgs_templates/kitti_models/second_plugin.yaml @@ -0,0 +1,121 @@ +CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist'] + +DATA_CONFIG: + _BASE_CONFIG_: {{ pcdet_base_path }}/tools/cfgs/dataset_configs/kitti_dataset.yaml + + +MODEL: + NAME: SECONDNet + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelBackBone8xTS + + MAP_TO_BEV: + NAME: HeightCompressionTS + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: AnchorHeadSingle + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: True + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Car', + 'anchor_sizes': [[3.9, 1.6, 1.56]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.6, + 'unmatched_threshold': 0.45 + }, + { + 'class_name': 'Pedestrian', + 'anchor_sizes': [[0.8, 0.6, 1.73]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.6], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.5, + 'unmatched_threshold': 0.35 + }, + { + 'class_name': 'Cyclist', + 'anchor_sizes': [[1.76, 0.6, 1.73]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.6], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.5, + 'unmatched_threshold': 0.35 + } + ] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.1 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.01 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 4 + NUM_EPOCHS: 80 + + OPTIMIZER: adam_onecycle + LR: 0.003 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 diff --git a/examples/openpcdet/cfgs_templates/kitti_models/voxel_rcnn_car_plugin.yaml b/examples/openpcdet/cfgs_templates/kitti_models/voxel_rcnn_car_plugin.yaml new file mode 100644 index 0000000..8bde6cd --- /dev/null +++ b/examples/openpcdet/cfgs_templates/kitti_models/voxel_rcnn_car_plugin.yaml @@ -0,0 +1,202 @@ +CLASS_NAMES: ['Car'] + +DATA_CONFIG: + _BASE_CONFIG_: {{ pcdet_base_path }}/tools/cfgs/dataset_configs/kitti_dataset.yaml + DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: gt_sampling + USE_ROAD_PLANE: True + DB_INFO_PATH: + - kitti_dbinfos_train.pkl + PREPARE: { + filter_by_min_points: ['Car:5'], + filter_by_difficulty: [-1], + } + + SAMPLE_GROUPS: ['Car:15'] + NUM_POINT_FEATURES: 4 + DATABASE_WITH_FAKELIDAR: False + REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + LIMIT_WHOLE_SCENE: False + + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + +MODEL: + NAME: VoxelRCNN + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelBackBone8xTS + + MAP_TO_BEV: + NAME: HeightCompressionTS + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [64, 128] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [128, 128] + + DENSE_HEAD: + NAME: AnchorHeadSingle + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: True + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Car', + 'anchor_sizes': [[3.9, 1.6, 1.56]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.6, + 'unmatched_threshold': 0.45 + }, + ] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + ROI_HEAD: + NAME: VoxelRCNNHead + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 9000 + NMS_POST_MAXSIZE: 512 + NMS_THRESH: 0.8 + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + USE_FAST_NMS: False + SCORE_THRESH: 0.0 + NMS_PRE_MAXSIZE: 2048 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + + ROI_GRID_POOL: + FEATURES_SOURCE: ['x_conv2', 'x_conv3', 'x_conv4'] + PRE_MLP: True + GRID_SIZE: 6 + POOL_LAYERS: + x_conv2: + MLPS: [[32, 32]] + QUERY_RANGES: [[4, 4, 4]] + POOL_RADIUS: [0.4] + NSAMPLE: [16] + POOL_METHOD: max_pool + x_conv3: + MLPS: [[32, 32]] + QUERY_RANGES: [[4, 4, 4]] + POOL_RADIUS: [0.8] + NSAMPLE: [16] + POOL_METHOD: max_pool + x_conv4: + MLPS: [[32, 32]] + QUERY_RANGES: [[4, 4, 4]] + POOL_RADIUS: [1.6] + NSAMPLE: [16] + POOL_METHOD: max_pool + TARGET_CONFIG: + BOX_CODER: ResidualCoder + ROI_PER_IMAGE: 128 + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.55 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + GRID_3D_IOU_LOSS: False + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'rcnn_iou3d_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.3 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.1 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 2 + NUM_EPOCHS: 80 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 diff --git a/examples/openpcdet/cfgs_templates/nuscenes_models/cbgs_voxel0075_voxelnext.yaml b/examples/openpcdet/cfgs_templates/nuscenes_models/cbgs_voxel0075_voxelnext.yaml new file mode 100644 index 0000000..b0add62 --- /dev/null +++ b/examples/openpcdet/cfgs_templates/nuscenes_models/cbgs_voxel0075_voxelnext.yaml @@ -0,0 +1,156 @@ +CLASS_NAMES: ['car','truck', 'construction_vehicle', 'bus', 'trailer', + 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'] + +DATA_CONFIG: + _BASE_CONFIG_: {{ pcdet_base_path }}/tools/cfgs/dataset_configs/nuscenes_dataset.yaml + POINT_CLOUD_RANGE: [-54.0, -54.0, -5.0, 54.0, 54.0, 3.0] + INFO_PATH: { + 'train': [nuscenes_infos_10sweeps_train.pkl], + 'test': [nuscenes_infos_10sweeps_val.pkl], + } + DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: gt_sampling + DB_INFO_PATH: + - nuscenes_dbinfos_10sweeps_withvelo.pkl + USE_SHARED_MEMORY: False #True # set it to True to speed up (it costs about 15GB shared memory) + DB_DATA_PATH: + - nuscenes_dbinfos_10sweeps_withvelo_global.pkl.npy + PREPARE: { + filter_by_min_points: [ + 'car:5','truck:5', 'construction_vehicle:5', 'bus:5', 'trailer:5', + 'barrier:5', 'motorcycle:5', 'bicycle:5', 'pedestrian:5', 'traffic_cone:5' + ], + } + + SAMPLE_GROUPS: [ + 'car:2','truck:2', 'construction_vehicle:2', 'bus:2', 'trailer:2', + 'barrier:2', 'motorcycle:2', 'bicycle:2', 'pedestrian:2', 'traffic_cone:2' + ] + + NUM_POINT_FEATURES: 5 + DATABASE_WITH_FAKELIDAR: False + REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + LIMIT_WHOLE_SCENE: True + + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x', 'y'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.9, 1.1] + + - NAME: random_world_translation + NOISE_TRANSLATE_STD: [0.5, 0.5, 0.5] + + + DATA_PROCESSOR: + - NAME: mask_points_and_boxes_outside_range + REMOVE_OUTSIDE_BOXES: True + + - NAME: shuffle_points + SHUFFLE_ENABLED: { + 'train': True, + 'test': True + } + + - NAME: transform_points_to_voxels + VOXEL_SIZE: [0.075, 0.075, 0.2] + MAX_POINTS_PER_VOXEL: 10 + MAX_NUMBER_OF_VOXELS: { + 'train': 120000, + 'test': 160000 + } + + +MODEL: + NAME: VoxelNeXt + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelResBackBone8xVoxelNeXtTS + + DENSE_HEAD: + NAME: VoxelNeXtHeadTS + CLASS_AGNOSTIC: False + INPUT_FEATURES: 128 + + CLASS_NAMES_EACH_HEAD: [ + ['car'], + ['truck', 'construction_vehicle'], + ['bus', 'trailer'], + ['barrier'], + ['motorcycle', 'bicycle'], + ['pedestrian', 'traffic_cone'], + ] + + SHARED_CONV_CHANNEL: 128 + KERNEL_SIZE_HEAD: 1 + + USE_BIAS_BEFORE_NORM: True + NUM_HM_CONV: 2 + SEPARATE_HEAD_CFG: + HEAD_ORDER: ['center', 'center_z', 'dim', 'rot', 'vel'] + HEAD_DICT: { + 'center': {'out_channels': 2, 'num_conv': 2}, + 'center_z': {'out_channels': 1, 'num_conv': 2}, + 'dim': {'out_channels': 3, 'num_conv': 2}, + 'rot': {'out_channels': 2, 'num_conv': 2}, + 'vel': {'out_channels': 2, 'num_conv': 2}, + } + + TARGET_ASSIGNER_CONFIG: + FEATURE_MAP_STRIDE: 8 + NUM_MAX_OBJS: 500 + GAUSSIAN_OVERLAP: 0.1 + MIN_RADIUS: 2 + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 0.25, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2, 1.0, 1.0] + } + + POST_PROCESSING: + SCORE_THRESH: 0.1 + POST_CENTER_LIMIT_RANGE: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0] + MAX_OBJ_PER_SAMPLE: 500 + NMS_CONFIG: + NMS_TYPE: nms_gpu + NMS_THRESH: 0.2 + NMS_PRE_MAXSIZE: 1000 + NMS_POST_MAXSIZE: 83 + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + + EVAL_METRIC: kitti + + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 4 + NUM_EPOCHS: 20 + + OPTIMIZER: adam_onecycle + LR: 0.001 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 diff --git a/examples/openpcdet/converted_models/README.md b/examples/openpcdet/converted_models/README.md new file mode 100644 index 0000000..97d7e95 --- /dev/null +++ b/examples/openpcdet/converted_models/README.md @@ -0,0 +1 @@ +Default model conversion base folder for the demo. Please create the relative path to each specific model under this directory. diff --git a/examples/openpcdet/converter_voxelnext.py b/examples/openpcdet/converter_voxelnext.py new file mode 100644 index 0000000..dcc8907 --- /dev/null +++ b/examples/openpcdet/converter_voxelnext.py @@ -0,0 +1,241 @@ +import torch +import re +import numpy as np + +conv_weights = [ + "backbone_3d.conv_input.0.weight", + "backbone_3d.conv1.0.conv1.weight", + "backbone_3d.conv1.0.conv2.weight", + "backbone_3d.conv1.1.conv1.weight", + "backbone_3d.conv1.1.conv2.weight", + "backbone_3d.conv2.0.0.weight", + "backbone_3d.conv2.1.conv1.weight", + "backbone_3d.conv2.1.conv2.weight", + "backbone_3d.conv2.2.conv1.weight", + "backbone_3d.conv2.2.conv2.weight", + "backbone_3d.conv3.0.0.weight", + "backbone_3d.conv3.1.conv1.weight", + "backbone_3d.conv3.1.conv2.weight", + "backbone_3d.conv3.2.conv1.weight", + "backbone_3d.conv3.2.conv2.weight", + "backbone_3d.conv4.0.0.weight", + "backbone_3d.conv4.1.conv1.weight", + "backbone_3d.conv4.1.conv2.weight", + "backbone_3d.conv4.2.conv1.weight", + "backbone_3d.conv4.2.conv2.weight", + "backbone_3d.conv5.0.0.weight", + "backbone_3d.conv5.1.conv1.weight", + "backbone_3d.conv5.1.conv2.weight", + "backbone_3d.conv5.2.conv1.weight", + "backbone_3d.conv5.2.conv2.weight", + "backbone_3d.conv6.0.0.weight", + "backbone_3d.conv6.1.conv1.weight", + "backbone_3d.conv6.1.conv2.weight", + "backbone_3d.conv6.2.conv1.weight", + "backbone_3d.conv6.2.conv2.weight", + # "backbone_3d.conv_out.0.weight", + "backbone_3d.shared_conv.0.weight", + "dense_head.heads_list.0.center.0.0.weight", + "dense_head.heads_list.0.center.1.weight", + "dense_head.heads_list.0.center_z.0.0.weight", + "dense_head.heads_list.0.center_z.1.weight", + "dense_head.heads_list.0.dim.0.0.weight", + "dense_head.heads_list.0.dim.1.weight", + "dense_head.heads_list.0.rot.0.0.weight", + "dense_head.heads_list.0.rot.1.weight", + "dense_head.heads_list.0.hm.0.0.weight", + "dense_head.heads_list.0.hm.1.weight", + "dense_head.heads_list.1.center.0.0.weight", + "dense_head.heads_list.1.center.1.weight", + "dense_head.heads_list.1.center_z.0.0.weight", + "dense_head.heads_list.1.center_z.1.weight", + "dense_head.heads_list.1.dim.0.0.weight", + "dense_head.heads_list.1.dim.1.weight", + "dense_head.heads_list.1.rot.0.0.weight", + "dense_head.heads_list.1.rot.1.weight", + "dense_head.heads_list.1.hm.0.0.weight", + "dense_head.heads_list.1.hm.1.weight", + "dense_head.heads_list.2.center.0.0.weight", + "dense_head.heads_list.2.center.1.weight", + "dense_head.heads_list.2.center_z.0.0.weight", + "dense_head.heads_list.2.center_z.1.weight", + "dense_head.heads_list.2.dim.0.0.weight", + "dense_head.heads_list.2.dim.1.weight", + "dense_head.heads_list.2.rot.0.0.weight", + "dense_head.heads_list.2.rot.1.weight", + "dense_head.heads_list.2.hm.0.0.weight", + "dense_head.heads_list.2.hm.1.weight", + "dense_head.heads_list.3.center.0.0.weight", + "dense_head.heads_list.3.center.1.weight", + "dense_head.heads_list.3.center_z.0.0.weight", + "dense_head.heads_list.3.center_z.1.weight", + "dense_head.heads_list.3.dim.0.0.weight", + "dense_head.heads_list.3.dim.1.weight", + "dense_head.heads_list.3.rot.0.0.weight", + "dense_head.heads_list.3.rot.1.weight", + "dense_head.heads_list.3.hm.0.0.weight", + "dense_head.heads_list.3.hm.1.weight", + "dense_head.heads_list.4.center.0.0.weight", + "dense_head.heads_list.4.center.1.weight", + "dense_head.heads_list.4.center_z.0.0.weight", + "dense_head.heads_list.4.center_z.1.weight", + "dense_head.heads_list.4.dim.0.0.weight", + "dense_head.heads_list.4.dim.1.weight", + "dense_head.heads_list.4.rot.0.0.weight", + "dense_head.heads_list.4.rot.1.weight", + "dense_head.heads_list.4.hm.0.0.weight", + "dense_head.heads_list.4.hm.1.weight", + "dense_head.heads_list.5.center.0.0.weight", + "dense_head.heads_list.5.center.1.weight", + "dense_head.heads_list.5.center_z.0.0.weight", + "dense_head.heads_list.5.center_z.1.weight", + "dense_head.heads_list.5.dim.0.0.weight", + "dense_head.heads_list.5.dim.1.weight", + "dense_head.heads_list.5.rot.0.0.weight", + "dense_head.heads_list.5.rot.1.weight", + "dense_head.heads_list.5.hm.0.0.weight", + "dense_head.heads_list.5.hm.1.weight", + "dense_head.heads_list.0.vel.0.0.weight", + "dense_head.heads_list.0.vel.1.weight", + "dense_head.heads_list.1.vel.0.0.weight", + "dense_head.heads_list.1.vel.1.weight", + "dense_head.heads_list.2.vel.0.0.weight", + "dense_head.heads_list.2.vel.1.weight", + "dense_head.heads_list.3.vel.0.0.weight", + "dense_head.heads_list.3.vel.1.weight", + "dense_head.heads_list.4.vel.0.0.weight", + "dense_head.heads_list.4.vel.1.weight", + "dense_head.heads_list.5.vel.0.0.weight", + "dense_head.heads_list.5.vel.1.weight" +] + +no_squeeze = [ + "dense_head.heads_list.0.center_z.1.weight", + "dense_head.heads_list.0.hm.1.weight", + "dense_head.heads_list.1.center_z.1.weight", + "dense_head.heads_list.2.center_z.1.weight", + "dense_head.heads_list.3.center_z.1.weight", + "dense_head.heads_list.3.hm.1.weight", + "dense_head.heads_list.4.center_z.1.weight", + "dense_head.heads_list.5.center_z.1.weight" +] + + +def convert_weights_2d(key, model): + new_key = key.replace(".weight", ".kernel") + weights = model[key] + oc, kx, ky, ic = weights.shape + + converted_weights = weights.reshape(oc, -1, ic) + + converted_weights = converted_weights.permute(1, 0, 2) + + # find order of dimension + # weight_order = [] + # for weight in converted_weights: + # weight_order.append(torch.sum(weight)) + + # weight_order_rearranged = [] + + if converted_weights.shape[0] == 1: + converted_weights = converted_weights[0] + converted_weights = converted_weights.permute(1,0) + elif converted_weights.shape[0] == 9: + offsets = [ list(range(ky)), list(range(kx))] + offsets = [ + (x * ky + y) + for y in offsets[0] + for x in offsets[1] + ] + offsets = torch.tensor( + offsets, dtype=torch.int64, device=converted_weights.device + ) + converted_weights = converted_weights[offsets] + converted_weights = converted_weights.permute(0,2,1) + # for weight in converted_weights: + # weight_order_rearranged.append(torch.sum(weight)) + + return new_key, converted_weights + +def convert_unit_weight(key, model): + new_key = key.replace(".weight", ".kernel") + weight = model[key] + oc, kx, ky, ic = weight.shape + new_weight = weight.transpose(0,1).reshape(1,ic,oc) + # if key in no_squeeze: + # return new_key, new_weight + # else: + # return new_key, torch.squeeze(new_weight) + return new_key, torch.squeeze(new_weight, [0]) + + +def convert_weights_3d(key, model): + new_key = key.replace(".weight", ".kernel") + weights = model[key] + oc, kx, ky, kz, ic = weights.shape + + converted_weights = weights.reshape(oc, -1, ic) + + # [oc, x*y*z, ic] -> [x*y*z, oc, ic] + converted_weights = converted_weights.permute(1, 0, 2) + + if converted_weights.shape[0] == 1: + converted_weights = converted_weights[0] + elif converted_weights.shape[0] == 27: + offsets = [list(range(kz)), list(range(ky)), list(range(kx))] + kykx = ky * kx + offsets = [ + (x * kykx + y * kx + z) + for z in offsets[0] + for y in offsets[1] + for x in offsets[2] + ] + offsets = torch.tensor( + offsets, dtype=torch.int64, device=converted_weights.device + ) + converted_weights = converted_weights[offsets] + + # [x*y*z, oc, ic] -> [x*y*z, ic, oc] + converted_weights = converted_weights.permute(0,2,1) + + return new_key, converted_weights + +def convert_voxelnext(ckpt_before, ckpt_after): + cp_old = torch.load(ckpt_before, map_location="cpu") + try: + model = cp_old['model_state'] + except: + model = cp_old + new_model = dict() + + for key in model: + is_sparseconv_weight = False + if key in conv_weights: # and not re.search(r'lateral_layer', key) + # is_sparseconv_weight = len(model[key].shape) > 1 # dimension larger than 1 + is_sparseconv_weight = True + + if is_sparseconv_weight: + if len(model[key].shape) == 5: + new_key, converted_weights = convert_weights_3d(key, model) + if key == 'backbone_3d.conv_input.0.weight': + # converted_weights = converted_weights[:, :-1, :] + converted_weights = converted_weights[:, :, :] + elif np.prod(model[key].shape[1:-1]) == 1: # kernel size is 1 + # This is a 2d kernel with the kerne size of 1. + new_key, converted_weights = convert_unit_weight(key, model) + elif len(model[key].shape) == 4: + new_key, converted_weights = convert_weights_2d(key, model) + else: + new_key = key.replace(".weight", ".kernel") + converted_weights = model[key] + else: + new_key = key + converted_weights = model[key] + + new_model[new_key] = converted_weights + + cp_old['model_state'] = new_model + torch.save(cp_old, ckpt_after) + + +convert_voxelnext("/home/yingqi/repo/OpenPCDet/models/VoxelNeXt/voxelnext_nuscenes_kernel1.pth", "/home/yingqi/repo/torchsparse-dev/converted_models/openpcdet/VoxelNeXt/voxelnext_nuscenes_kernel1.pth") \ No newline at end of file diff --git a/examples/openpcdet/demo.ipynb b/examples/openpcdet/demo.ipynb new file mode 100644 index 0000000..d5424b7 --- /dev/null +++ b/examples/openpcdet/demo.ipynb @@ -0,0 +1,650 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TorchSparse for OpenPCDet Plugin demo\n", + "You can run the cells below to run the evaluation of TorchSparse integrated MMDetection3D models. \n", + "\n", + "## Dependencies\n", + "- Conda\n", + "- OpenPCDet installation: Please follow the [OpenPCDet documentation](https://github.com/open-mmlab/OpenPCDet/blob/master/docs/INSTALL.md). \n", + "- Pre-process the datasets required by OpenPCDet ([see here](https://github.com/open-mmlab/OpenPCDet/blob/master/docs/GETTING_STARTED.md)). \n", + "- TorchSparse installation.\n", + "- Install TorchSparse plugin for OpenPCDet\n", + " 1. Clone this repository\n", + " 2. Define the environment variable `PCDET_BASE` to point to the installation path of OpenPCDet.\n", + " 3. Go to `examples/openpcdet` and run `pip install -v -e .`\n", + "\n", + "## Note\n", + "1. In some cases, you need to turn off PyTorch JIT compile to avoid JIT errors when using the OpenPCDet framework. [This post](https://discuss.pytorch.org/t/turning-pytorch-jit-mode-on-off-dynamically/31288/4) tells you how to turn it off. Typically, you just need to go to the `.py` file reporting this error and add the following line in the import section:\n", + "```python\n", + "import torch\n", + "torch.jit._state.disable()\n", + "```\n", + "2. There is no way to replace the layers of a OpenPCDet model with the `cfg_options` like in the `mmdetection3d`, and it also doesn't have the option to register a customized layer. So, for the demonstration purposes, the torchsparse modules are monkey-patched to add the corresponding TorchSparse layers when the plugin is imported, and they are selected with the modified conguration file (under `examples/openpcdet/cfgs`) provided in the example. \n", + "3. Modify the dataset path in the model config: The dataset config file path in the OpenPCDet's model config file is a relative path that is valid only if the you are running the evaluation under the `tools` directory. \n", + "```yaml\n", + "DATA_CONFIG: \n", + " _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml\n", + "```\n", + "It won't work if the script is being executed somewhere else, and it will raise the error `FileNotFoundError: [Errno 2] No such file or directory: 'cfgs/dataset_configs/kitti_dataset.yaml'`. Thus it needed to be changed into an absolute path to the dataset config in the model config and plug-in model config. The `DATA_PATH` pointing to kitti_dataset.yaml in the TorchSparse integrated model configs is automatically inferred and updated in `cfgs` during the plugin installation (it asked to provide the path `PCDET_BASE` as an environment variable). \n", + "4. Then, you need to change the data root in the OpenPCDet's dataset config file to be the full path of the corresponding dataset root, for the same reason. For example, for Kitti models, you need to change `DATA_PATH: '../data/kitti'` in `OpenPCDet/tools/cfgs/dataset_configs/kitti_dataset.yaml` to point to the absolute path of the dataset base. \n", + "\n", + "## Steps\n", + "1. Install the dependencies. \n", + "2. Specify the base pathes and model registry.\n", + "3. Activate the plugin: go to `OpenPCDet/tools/test.py` and add `import pcdet_plugin` as the last import statement. \n", + "4. Run demo. \n", + "5. Print the evaluation results. \n", + "\n", + "## Available models\n", + "- Kitti\n", + " - SEOND\n", + " - PV_RCNN\n", + " - Part-A2\n", + "- NuScenes\n", + " - VoxelNeXt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import importlib.util\n", + "import sys, os\n", + "from pathlib import Path\n", + "import subprocess\n", + "\n", + "# Loading the model converter to this notebook. \n", + "# Define the relative path to the file\n", + "relative_path = \"../converter.py\"\n", + "file_path = Path().resolve() / relative_path\n", + "\n", + "# Add the directory containing the file to sys.path\n", + "sys.path.append(str(file_path.parent))\n", + "\n", + "# Load the module\n", + "spec = importlib.util.spec_from_file_location(\"convert_weights\", str(file_path))\n", + "converter = importlib.util.module_from_spec(spec)\n", + "spec.loader.exec_module(converter)\n", + "\n", + "converter = getattr(converter, \"convert_weights\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check that the model converter is successfully loaded\n", + "print(converter)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Specify the Paths\n", + "To run this demo, you need to provide the following paths:\n", + "1. `openpc_path`: OpenPCDet installation path.\n", + "2. `openpc_model_base_path`: Input pretrained weight path. \n", + "3. `torchsparse_model_base_path`: Output pretrained weight path.\n", + "4. `openpc_cfg_base_path`: OpenPCDet configuration files base path.\n", + " - This configuration file is required in the model conversion. Specifically, it use the original configuration file to create a model to identify the Sparse Conv modules, and convert the weights for only those modules. If you installed the OpenPCDet on source, then it should be `tools/cfgs` folder in its repo. \n", + "5. `torchsparse_cfg_base_path`: The path of the configuration file of TorchSparse integrated models. This configuration is the same as the config file in the OpenPCDet repository, except certain model layers is replaced with TorchSparse integrated modules. Default to be the `cfgs` folder in this example folder. \n", + "6. Conda environment name: this demo initialize a sub-shell to execute the demo with `subprocess`. So you need to specify the name of the conda environment that you want to use to run the demo. \n", + "\n", + "For paths 2, 3, 4, and 5, we expect you to organize them by having a base path and put the checkpoint/configurations files of different models under the same basepath. For example, for the input pertrained weight path, the file structure looks like: \n", + "\n", + "```text\n", + "/home/ubuntu/openpc_model_base/ \n", + "├── SECOND/ \n", + "│ └── SECOND_Checkpoint.pth\n", + "├── PV-RCNN/\n", + "│ └── PV-RCNN_Checkpoint.pth\n", + "└── VoxelNeXt/\n", + " └── VoxelNeXt_Checkpoint.pth\n", + "```\n", + "In this case, you need to configure the `openpc_model_base_path` to be `/home/ubuntu/openpc_model_base` and in the SECOND's registry entry, set `ckpt_before` to be `SECOND/SECOND_Checkpoint.pth`. \n", + "\n", + "In addition to the paths, we also need you to specify:\n", + "1. SpConv version of the original model.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "env_name = \"torchsparse\"\n", + "\n", + "base_paths = {\n", + " 'openpc_path': None,\n", + " 'openpc_model_base_path': None,\n", + " 'torchsparse_model_base_path': os.path.joinin(os.path.abspath(''), \"converted_models\"),\n", + " 'openpc_cfg_base_path': None,\n", + " 'torchsparse_cfg_base_path': os.path.joinin(os.path.abspath(''), \"cfgs\")\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The function to run a single demo is defined below. Based on the configuration dictionary you provid, it convert the model weights then use the `tools/test.py` in the `OpenPCDet` repo to run the model evaluation. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def openpc_single_demo(registry_entry, base_paths, convert=True):\n", + "\n", + " assert os.path.isdir(base_paths['openpc_model_base_path']), f\"OpenPCDet model base path {base_paths['openpc_model_base_path']} does not exist.\"\n", + " assert os.path.isdir(base_paths['torchsparse_model_base_path']), f\"TorchSparse model base path {base_paths['torchsparse_model_base_path']} does not exist.\"\n", + " assert os.path.isdir(base_paths['openpc_cfg_base_path']), f\"OpenPCDet cfg base path {base_paths['openpc_cfg_base_path']} does not exist.\"\n", + " assert os.path.isdir(base_paths['torchsparse_cfg_base_path']), f\"TorchSparse cfg base path {base_paths['torchsparse_cfg_base_path']} does not exist.\"\n", + " assert os.path.isdir(base_paths['openpc_path']), f\"OpenPCDet path {base_paths['openpc_path']} does not exist.\"\n", + "\n", + " # pre-process paths\n", + " openpc_cfg_path = os.path.join(base_paths['openpc_cfg_base_path'], registry_entry['openpc_cfg_path'])\n", + " torchsparse_cfg_path = os.path.join(base_paths['torchsparse_cfg_base_path'], registry_entry['torchsparse_cfg_path'])\n", + " test_file_path = os.path.join(base_paths['openpc_path'], \"tools/test.py\")\n", + " openpc_model_path = os.path.join(base_paths['openpc_model_base_path'], registry_entry['ckpt_before'])\n", + " assert os.path.isdir(base_paths['torchsparse_model_base_path']), \"Please create the directory for the converted model.\"\n", + " torchsparse_model_path = os.path.join(base_paths['torchsparse_model_base_path'], registry_entry['ckpt_after'])\n", + "\n", + " # convert the model\n", + " if convert:\n", + " parent_dir = os.path.dirname(torchsparse_model_path)\n", + " if not os.path.exists(parent_dir):\n", + " os.makedirs(parent_dir)\n", + " converter(\n", + " ckpt_before=openpc_model_path,\n", + " ckpt_after=torchsparse_model_path,\n", + " cfg_path=openpc_cfg_path,\n", + " v_spconv = registry_entry['v_spconv'],\n", + " framework='openpc'\n", + " )\n", + "\n", + " command = f'bash -c \"conda activate {env_name}; python {test_file_path} --cfg_file {torchsparse_cfg_path} --ckpt {torchsparse_model_path}\"'\n", + " print(command)\n", + " result = subprocess.run(command, capture_output=True, text=True, shell=True, executable='/bin/bash')\n", + " return result # result have .stdout and .stderr attributes to get the output. \n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model Evaluation\n", + "### SECOND\n", + "Run a SECOND demo. You can print the evaluation results of the model from the sub-process's `stdout` and `stderr`. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "second_registry = {\n", + " 'ckpt_before': 'SECOND/second_7862.pth',\n", + " 'ckpt_after': 'SECOND/second_7862.pth',\n", + " 'openpc_cfg_path': 'kitti_models/second.yaml',\n", + " 'torchsparse_cfg_path': 'kitti_models/second_plugin.yaml',\n", + " 'v_spconv': 1\n", + "}\n", + "\n", + "second_results = openpc_single_demo(second_registry, base_paths, convert=True)\n", + "print(second_results.stdout)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Printout the evaluation resutls. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(second_results.stderr)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Expected Outputs: \n", + "\n", + "```\n", + "2024-08-03 01:31:45,287 INFO *************** Performance of EPOCH 7862 *****************\n", + "2024-08-03 01:31:45,288 INFO Generate label finished(sec_per_example: 0.0071 second).\n", + "2024-08-03 01:31:45,288 INFO recall_roi_0.3: 0.000000\n", + "2024-08-03 01:31:45,288 INFO recall_rcnn_0.3: 0.949140\n", + "2024-08-03 01:31:45,288 INFO recall_roi_0.5: 0.000000\n", + "2024-08-03 01:31:45,288 INFO recall_rcnn_0.5: 0.890933\n", + "2024-08-03 01:31:45,288 INFO recall_roi_0.7: 0.000000\n", + "2024-08-03 01:31:45,288 INFO recall_rcnn_0.7: 0.665509\n", + "2024-08-03 01:31:45,289 INFO Average predicted number of objects(3769 samples): 14.170\n", + "\n", + "2024-08-03 01:32:02,104 INFO Car AP@0.70, 0.70, 0.70:\n", + "bbox AP:90.7803, 89.8974, 89.0423\n", + "bev AP:90.0077, 87.9303, 86.4681\n", + "3d AP:88.6137, 78.6221, 77.1884\n", + "aos AP:90.76, 89.77, 88.82\n", + "Car AP_R40@0.70, 0.70, 0.70:\n", + "bbox AP:95.6299, 94.1655, 91.7658\n", + "bev AP:92.4182, 88.5667, 87.6569\n", + "3d AP:90.5570, 81.6217, 78.5918\n", + "aos AP:95.60, 94.00, 91.52\n", + "Car AP@0.70, 0.50, 0.50:\n", + "bbox AP:90.7803, 89.8974, 89.0423\n", + "bev AP:90.7940, 90.1455, 89.5185\n", + "3d AP:90.7940, 90.0875, 89.3992\n", + "aos AP:90.76, 89.77, 88.82\n", + "Car AP_R40@0.70, 0.50, 0.50:\n", + "bbox AP:95.6299, 94.1655, 91.7658\n", + "bev AP:95.6780, 94.8539, 94.2489\n", + "3d AP:95.6652, 94.7514, 94.0543\n", + "aos AP:95.60, 94.00, 91.52\n", + "Pedestrian AP@0.50, 0.50, 0.50:\n", + "bbox AP:68.8175, 66.3438, 63.3219\n", + "bev AP:62.0900, 56.6800, 53.8592\n", + "3d AP:56.5254, 52.9844, 47.6520\n", + "aos AP:64.72, 61.77, 58.52\n", + "Pedestrian AP_R40@0.50, 0.50, 0.50:\n", + "bbox AP:69.5828, 66.5427, 62.8821\n", + "bev AP:60.8272, 56.6307, 52.1901\n", + "3d AP:55.8311, 51.1306, 46.0872\n", + "aos AP:64.94, 61.34, 57.60\n", + "Pedestrian AP@0.50, 0.25, 0.25:\n", + "bbox AP:68.8175, 66.3438, 63.3219\n", + "bev AP:75.4727, 73.9056, 69.7768\n", + "3d AP:75.4608, 73.8354, 69.6737\n", + "aos AP:64.72, 61.77, 58.52\n", + "Pedestrian AP_R40@0.50, 0.25, 0.25:\n", + "bbox AP:69.5828, 66.5427, 62.8821\n", + "bev AP:76.3837, 74.7549, 70.8290\n", + "3d AP:76.3760, 74.6214, 70.7453\n", + "aos AP:64.94, 61.34, 57.60\n", + "Cyclist AP@0.50, 0.50, 0.50:\n", + "bbox AP:87.5600, 77.0733, 74.3888\n", + "bev AP:84.0752, 70.7383, 65.4639\n", + "3d AP:80.6792, 67.1007, 62.1278\n", + "aos AP:87.42, 76.73, 74.00\n", + "Cyclist AP_R40@0.50, 0.50, 0.50:\n", + "bbox AP:91.4045, 79.0474, 75.4231\n", + "bev AP:88.0865, 71.1796, 66.8857\n", + "3d AP:83.0254, 66.7160, 62.5131\n", + "aos AP:91.24, 78.63, 74.98\n", + "Cyclist AP@0.50, 0.25, 0.25:\n", + "bbox AP:87.5600, 77.0733, 74.3888\n", + "bev AP:86.0192, 76.8562, 72.4265\n", + "3d AP:86.0192, 76.8562, 72.4245\n", + "aos AP:87.42, 76.73, 74.00\n", + "Cyclist AP_R40@0.50, 0.25, 0.25:\n", + "bbox AP:91.4045, 79.0474, 75.4231\n", + "bev AP:90.2871, 77.3294, 73.5617\n", + "3d AP:90.2871, 77.3293, 73.5547\n", + "aos AP:91.24, 78.63, 74.98\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### PV-RCNN\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pv_rcnn_registry = {\n", + " 'ckpt_before': 'PV-RCNN/pv_rcnn_8369.pth',\n", + " 'ckpt_after': 'PV-RCNN/pv_rcnn_8369.pth',\n", + " 'openpc_cfg_path': 'kitti_models/pv_rcnn.yaml',\n", + " 'torchsparse_cfg_path': 'kitti_models/pv_rcnn_plugin.yaml',\n", + " 'v_spconv': 1\n", + "}\n", + "\n", + "pv_rcnn_results = openpc_single_demo(pv_rcnn_registry, base_paths, convert=True)\n", + "print(pv_rcnn_results.stdout)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Printout the evaluation results. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(pv_rcnn_results.stderr)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Expected Output: \n", + "\n", + "```\n", + "2024-08-03 01:36:13,184 INFO *************** Performance of EPOCH 8369 *****************\n", + "2024-08-03 01:36:13,184 INFO Generate label finished(sec_per_example: 0.0375 second).\n", + "2024-08-03 01:36:13,184 INFO recall_roi_0.3: 0.968447\n", + "2024-08-03 01:36:13,184 INFO recall_rcnn_0.3: 0.968504\n", + "2024-08-03 01:36:13,184 INFO recall_roi_0.5: 0.928352\n", + "2024-08-03 01:36:13,184 INFO recall_rcnn_0.5: 0.934389\n", + "2024-08-03 01:36:13,184 INFO recall_roi_0.7: 0.716938\n", + "2024-08-03 01:36:13,184 INFO recall_rcnn_0.7: 0.759369\n", + "2024-08-03 01:36:13,186 INFO Average predicted number of objects(3769 samples): 9.206\n", + "\n", + "2024-08-03 01:36:31,071 INFO Car AP@0.70, 0.70, 0.70:\n", + "bbox AP:96.2503, 89.5026, 89.2371\n", + "bev AP:90.0794, 87.9057, 87.4102\n", + "3d AP:89.3399, 83.7288, 78.7346\n", + "aos AP:96.23, 89.39, 89.07\n", + "Car AP_R40@0.70, 0.70, 0.70:\n", + "bbox AP:98.2628, 94.4161, 92.2756\n", + "bev AP:93.0160, 90.3269, 88.5353\n", + "3d AP:92.1238, 84.4006, 82.4978\n", + "aos AP:98.24, 94.26, 92.07\n", + "Car AP@0.70, 0.50, 0.50:\n", + "bbox AP:96.2503, 89.5026, 89.2371\n", + "bev AP:96.2815, 89.4945, 89.2827\n", + "3d AP:96.2358, 89.4761, 89.2522\n", + "aos AP:96.23, 89.39, 89.07\n", + "Car AP_R40@0.70, 0.50, 0.50:\n", + "bbox AP:98.2628, 94.4161, 92.2756\n", + "bev AP:98.2553, 94.5899, 94.4293\n", + "3d AP:98.2372, 94.5246, 94.3279\n", + "aos AP:98.24, 94.26, 92.07\n", + "Pedestrian AP@0.50, 0.50, 0.50:\n", + "bbox AP:73.1750, 68.1068, 64.3705\n", + "bev AP:65.1549, 59.4407, 54.5509\n", + "3d AP:63.2132, 54.8977, 51.9049\n", + "aos AP:67.88, 62.55, 58.76\n", + "Pedestrian AP_R40@0.50, 0.50, 0.50:\n", + "bbox AP:73.6830, 68.3395, 64.3856\n", + "bev AP:65.9740, 58.5465, 54.1692\n", + "3d AP:62.8141, 54.5642, 49.9484\n", + "aos AP:67.88, 62.23, 58.11\n", + "Pedestrian AP@0.50, 0.25, 0.25:\n", + "bbox AP:73.1750, 68.1068, 64.3705\n", + "bev AP:76.3273, 71.9281, 69.4853\n", + "3d AP:76.3142, 71.8653, 69.4437\n", + "aos AP:67.88, 62.55, 58.76\n", + "Pedestrian AP_R40@0.50, 0.25, 0.25:\n", + "bbox AP:73.6830, 68.3395, 64.3856\n", + "bev AP:78.3134, 73.1922, 70.0144\n", + "3d AP:78.2993, 73.0964, 69.9333\n", + "aos AP:67.88, 62.23, 58.11\n", + "Cyclist AP@0.50, 0.50, 0.50:\n", + "bbox AP:96.2518, 81.3697, 76.5945\n", + "bev AP:88.5292, 73.4151, 70.4412\n", + "3d AP:86.2533, 69.8507, 64.5986\n", + "aos AP:96.11, 81.08, 76.25\n", + "Cyclist AP_R40@0.50, 0.50, 0.50:\n", + "bbox AP:97.1791, 82.4359, 78.2700\n", + "bev AP:93.4740, 74.6551, 70.2368\n", + "3d AP:90.8024, 70.5917, 66.0683\n", + "aos AP:97.07, 82.13, 77.92\n", + "Cyclist AP@0.50, 0.25, 0.25:\n", + "bbox AP:96.2518, 81.3697, 76.5945\n", + "bev AP:95.1249, 78.3304, 73.3570\n", + "3d AP:95.1249, 78.3214, 73.2931\n", + "aos AP:96.11, 81.08, 76.25\n", + "Cyclist AP_R40@0.50, 0.25, 0.25:\n", + "bbox AP:97.1791, 82.4359, 78.2700\n", + "bev AP:96.2788, 79.1643, 75.9079\n", + "3d AP:96.2788, 79.1586, 75.8420\n", + "aos AP:97.07, 82.13, 77.92\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Part-A2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "part_a2_registry = {\n", + " 'ckpt_before': 'Part-A2/PartA2_7940.pth',\n", + " 'ckpt_after': 'Part-A2/PartA2_7940.pth',\n", + " 'openpc_cfg_path': 'kitti_models/PartA2.yaml',\n", + " 'torchsparse_cfg_path': 'kitti_models/PartA2_plugin.yaml',\n", + " 'v_spconv': 1\n", + "}\n", + "\n", + "part_a2_results = openpc_single_demo(part_a2_registry, base_paths, convert=True)\n", + "print(part_a2_results.stderr)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Expected Output: \n", + "\n", + "```\n", + "2024-08-04 22:25:58,225 INFO *************** Performance of EPOCH 7940 *****************\n", + "2024-08-04 22:25:58,225 INFO Generate label finished(sec_per_example: 0.0148 second).\n", + "2024-08-04 22:25:58,225 INFO recall_roi_0.3: 0.970270\n", + "2024-08-04 22:25:58,225 INFO recall_rcnn_0.3: 0.970384\n", + "2024-08-04 22:25:58,225 INFO recall_roi_0.5: 0.930117\n", + "2024-08-04 22:25:58,225 INFO recall_rcnn_0.5: 0.935870\n", + "2024-08-04 22:25:58,225 INFO recall_roi_0.7: 0.710901\n", + "2024-08-04 22:25:58,225 INFO recall_rcnn_0.7: 0.746953\n", + "2024-08-04 22:25:58,227 INFO Average predicted number of objects(3769 samples): 11.203\n", + "\n", + "2024-08-04 22:26:14,556 INFO Car AP@0.70, 0.70, 0.70:\n", + "bbox AP:94.7980, 89.3054, 89.0451\n", + "bev AP:90.2376, 87.9529, 87.5332\n", + "3d AP:89.5301, 79.3967, 78.8623\n", + "aos AP:94.74, 89.15, 88.82\n", + "Car AP_R40@0.70, 0.70, 0.70:\n", + "bbox AP:97.8505, 93.7762, 91.7622\n", + "bev AP:92.8941, 89.9358, 88.3568\n", + "3d AP:92.1135, 82.8972, 82.1563\n", + "aos AP:97.79, 93.58, 91.50\n", + "Car AP@0.70, 0.50, 0.50:\n", + "bbox AP:94.7980, 89.3054, 89.0451\n", + "bev AP:94.7820, 89.3003, 89.0972\n", + "3d AP:94.7357, 89.2693, 89.0323\n", + "aos AP:94.74, 89.15, 88.82\n", + "Car AP_R40@0.70, 0.50, 0.50:\n", + "bbox AP:97.8505, 93.7762, 91.7622\n", + "bev AP:97.7269, 93.9681, 93.9136\n", + "3d AP:97.7006, 93.8868, 93.7499\n", + "aos AP:97.79, 93.58, 91.50\n", + "Pedestrian AP@0.50, 0.50, 0.50:\n", + "bbox AP:75.7000, 71.2206, 67.4326\n", + "bev AP:71.0432, 64.1691, 60.1552\n", + "3d AP:65.8572, 60.2919, 55.6764\n", + "aos AP:73.64, 68.58, 64.55\n", + "Pedestrian AP_R40@0.50, 0.50, 0.50:\n", + "bbox AP:76.2101, 71.9826, 68.6716\n", + "bev AP:70.8884, 64.3507, 59.3637\n", + "3d AP:67.2230, 59.9240, 54.7550\n", + "aos AP:73.94, 69.13, 65.41\n", + "Pedestrian AP@0.50, 0.25, 0.25:\n", + "bbox AP:75.7000, 71.2206, 67.4326\n", + "bev AP:78.8260, 75.3293, 72.8212\n", + "3d AP:78.8474, 75.2618, 72.7057\n", + "aos AP:73.64, 68.58, 64.55\n", + "Pedestrian AP_R40@0.50, 0.25, 0.25:\n", + "bbox AP:76.2101, 71.9826, 68.6716\n", + "bev AP:81.3466, 77.4266, 73.8207\n", + "3d AP:81.4174, 77.2437, 73.6680\n", + "aos AP:73.94, 69.13, 65.41\n", + "Cyclist AP@0.50, 0.50, 0.50:\n", + "bbox AP:89.0268, 77.5214, 76.1501\n", + "bev AP:87.2364, 73.7817, 70.8660\n", + "3d AP:85.6864, 70.1335, 65.5218\n", + "aos AP:88.87, 77.18, 75.69\n", + "Cyclist AP_R40@0.50, 0.50, 0.50:\n", + "bbox AP:94.3109, 80.2203, 77.5290\n", + "bev AP:92.3125, 73.8489, 70.7336\n", + "3d AP:90.5849, 70.2803, 67.1889\n", + "aos AP:94.10, 79.78, 77.05\n", + "Cyclist AP@0.50, 0.25, 0.25:\n", + "bbox AP:89.0268, 77.5214, 76.1501\n", + "bev AP:87.7328, 77.7907, 73.4163\n", + "3d AP:87.7328, 77.7888, 73.3536\n", + "aos AP:88.87, 77.18, 75.69\n", + "Cyclist AP_R40@0.50, 0.25, 0.25:\n", + "bbox AP:94.3109, 80.2203, 77.5290\n", + "bev AP:92.7067, 78.4131, 75.5830\n", + "3d AP:92.7029, 78.4100, 75.4983\n", + "aos AP:94.10, 79.78, 77.05\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### VoxelNeXt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The weights of VoxelNeXt differs from the previous models because it contains 2D sparse conv layers. We have a specialized converter for it. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from converter_voxelnext import convert_voxelnext\n", + "\n", + "voxelnext_registry = {\n", + " 'ckpt_before': 'VoxelNeXt/voxelnext_nuscenes_kernel1.pth',\n", + " 'ckpt_after': 'VoxelNeXt/voxelnext_nuscenes_kernel1.pth',\n", + " 'openpc_cfg_path': 'nuscenes_models/cbgs_voxel0075_voxelnext.yaml',\n", + " 'torchsparse_cfg_path': 'nuscenes_models/cbgs_voxel0075_voxelnext.yaml',\n", + " 'v_spconv': 2\n", + "}\n", + "\n", + "# pre-process paths\n", + "openpc_cfg_path = os.path.join(base_paths['openpc_cfg_base_path'], voxelnext_registry['openpc_cfg_path'])\n", + "torchsparse_cfg_path = os.path.join(base_paths['torchsparse_cfg_base_path'], voxelnext_registry['torchsparse_cfg_path'])\n", + "test_file_path = os.path.join(base_paths['openpc_path'], \"tools/test.py\")\n", + "openpc_model_path = os.path.join(base_paths['openpc_model_base_path'], voxelnext_registry['ckpt_before'])\n", + "torchsparse_model_path = os.path.join(base_paths['torchsparse_model_base_path'], voxelnext_registry['ckpt_after'])\n", + "\n", + "# convert_voxelnext(\n", + "# ckpt_before=openpc_model_path,\n", + "# ckpt_after=torchsparse_model_path,\n", + "# )\n", + "\n", + "command = f'bash -c \"conda activate torchsparse; python {test_file_path} --cfg_file {torchsparse_cfg_path} --ckpt {torchsparse_model_path}\"'\n", + "voxelnext_results = subprocess.run(command, capture_output=True, text=True, shell=True, executable='/bin/bash')\n", + "\n", + "print(voxelnext_results.stderr)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Expected Output: \n", + "\n", + "```\n", + "2024-08-05 00:49:37,228 INFO ----------------Nuscene detection_cvpr_2019 results-----------------\n", + "***car error@trans, scale, orient, vel, attr | AP@0.5, 1.0, 2.0, 4.0\n", + "0.19, 0.16, 0.17, 0.23, 0.20 | 73.06, 84.46, 88.36, 89.65 | mean AP: 0.8388369993093885\n", + "***truck error@trans, scale, orient, vel, attr | AP@0.5, 1.0, 2.0, 4.0\n", + "0.33, 0.18, 0.17, 0.20, 0.22 | 36.22, 55.39, 63.48, 67.13 | mean AP: 0.555570448511005\n", + "***construction_vehicle error@trans, scale, orient, vel, attr | AP@0.5, 1.0, 2.0, 4.0\n", + "0.70, 0.43, 1.22, 0.12, 0.30 | 2.29, 12.86, 27.62, 41.68 | mean AP: 0.21110277861079582\n", + "***bus error@trans, scale, orient, vel, attr | AP@0.5, 1.0, 2.0, 4.0\n", + "0.34, 0.18, 0.10, 0.38, 0.24 | 44.01, 71.78, 81.83, 84.45 | mean AP: 0.7051496836594204\n", + "***trailer error@trans, scale, orient, vel, attr | AP@0.5, 1.0, 2.0, 4.0\n", + "0.60, 0.20, 0.63, 0.16, 0.18 | 7.27, 32.79, 51.76, 60.86 | mean AP: 0.381712143855136\n", + "***barrier error@trans, scale, orient, vel, attr | AP@0.5, 1.0, 2.0, 4.0\n", + "0.20, 0.27, 0.09, nan, nan | 59.00, 69.85, 73.59, 75.16 | mean AP: 0.6940105701286209\n", + "***motorcycle error@trans, scale, orient, vel, attr | AP@0.5, 1.0, 2.0, 4.0\n", + "0.20, 0.24, 0.32, 0.31, 0.24 | 55.78, 64.17, 65.33, 65.97 | mean AP: 0.6281188558655376\n", + "***bicycle error@trans, scale, orient, vel, attr | AP@0.5, 1.0, 2.0, 4.0\n", + "0.17, 0.27, 0.48, 0.14, 0.01 | 47.51, 50.35, 50.84, 51.43 | mean AP: 0.5003450990308462\n", + "***pedestrian error@trans, scale, orient, vel, attr | AP@0.5, 1.0, 2.0, 4.0\n", + "0.15, 0.27, 0.47, 0.20, 0.09 | 82.28, 83.92, 85.31, 86.79 | mean AP: 0.8457525135254048\n", + "***traffic_cone error@trans, scale, orient, vel, attr | AP@0.5, 1.0, 2.0, 4.0\n", + "0.14, 0.32, nan, nan, nan | 66.33, 67.87, 69.92, 73.18 | mean AP: 0.6932564816100588\n", + "--------------average performance-------------\n", + "trans_err:\t 0.3013\n", + "scale_err:\t 0.2522\n", + "orient_err:\t 0.4058\n", + "vel_err:\t 0.2169\n", + "attr_err:\t 0.1856\n", + "mAP:\t 0.6054\n", + "NDS:\t 0.6665\n", + "\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "torchsparse", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/openpcdet/pcdet_plugin/__init__.py b/examples/openpcdet/pcdet_plugin/__init__.py new file mode 100644 index 0000000..7df3cd3 --- /dev/null +++ b/examples/openpcdet/pcdet_plugin/__init__.py @@ -0,0 +1,33 @@ +from .models import * + +# Apply Monkey Patch +# Monkey Patch here +import pcdet.models.backbones_3d as pcd_backbones_3d +import pcdet.models.backbones_3d.pfe as pcd_pfe +import pcdet.models.backbones_2d.map_to_bev as pcd_map_to_bev +import pcdet.models.dense_heads as pcd_dense_heads +import pcdet.models.roi_heads as pcd_roi_heads + +import pcdet_plugin.models.backbones_3d as pcd_plugin_backbones_3d +import pcdet_plugin.models.backbones_3d.pfe as pcd_plugin_pfe +import pcdet_plugin.models.backbones_3d.unet as pcd_plugin_spconv_unet +import pcdet_plugin.models.backbones_3d.backbone_voxel_next as pcd_plugin_backbone_voxel_next +import pcdet_plugin.models.backbones_2d.map_to_bev as pcd_plugin_map_to_bev +import pcdet_plugin.models.dense_heads.voxel_next_head as pcd_plugin_voxel_next_head +import pcdet_plugin.models.roi_heads.partA2_head as pcd_plugin_partA2_head + +import pcdet_plugin.models.detectors.detector3d_template as pcd_plugin_detector3d_template + +pcd_backbones_3d.__all__['VoxelBackBone8xTS'] = pcd_plugin_backbones_3d.VoxelBackBone8xTS +pcd_backbones_3d.__all__['UNetV2TS'] = pcd_plugin_spconv_unet.UNetV2TS +pcd_backbones_3d.__all__['VoxelResBackBone8xVoxelNeXtTS'] = pcd_plugin_backbone_voxel_next.VoxelResBackBone8xVoxelNeXtTS +pcd_map_to_bev.__all__['HeightCompressionTS'] = pcd_plugin_map_to_bev.HeightCompressionTS +pcd_pfe.__all__['VoxelSetAbstractionTS'] = pcd_plugin_pfe.VoxelSetAbstractionTS +pcd_dense_heads.__all__['VoxelNeXtHeadTS'] = pcd_plugin_voxel_next_head.VoxelNeXtHeadTS +pcd_roi_heads.__all__['PartA2FCHeadTS'] = pcd_plugin_partA2_head.PartA2FCHeadTS + +# Monkey patch the detector 3d template +import pcdet.models.detectors as pcd_detectors + +pcd_detectors.__all__['Detector3DTemplate']._load_state_dict = pcd_plugin_detector3d_template.Detector3DTemplate._load_state_dict +# pcd_detectors.detector3d_template.Detector3DTemplate = pcd_plugin_detector3d_template.Detector3DTemplate \ No newline at end of file diff --git a/examples/openpcdet/pcdet_plugin/models/__init__.py b/examples/openpcdet/pcdet_plugin/models/__init__.py new file mode 100644 index 0000000..3b948de --- /dev/null +++ b/examples/openpcdet/pcdet_plugin/models/__init__.py @@ -0,0 +1,6 @@ +from .backbones_2d import map_to_bev +from .backbones_3d import pfe +from .backbones_3d import backbone3d +from .dense_heads import voxel_next_head +from .detectors import detector3d_template +from .roi_heads import partA2_head \ No newline at end of file diff --git a/examples/openpcdet/pcdet_plugin/models/backbones_2d/__init__.py b/examples/openpcdet/pcdet_plugin/models/backbones_2d/__init__.py new file mode 100644 index 0000000..2743ca3 --- /dev/null +++ b/examples/openpcdet/pcdet_plugin/models/backbones_2d/__init__.py @@ -0,0 +1 @@ +from .map_to_bev import height_compression \ No newline at end of file diff --git a/examples/openpcdet/pcdet_plugin/models/backbones_2d/map_to_bev/__init__.py b/examples/openpcdet/pcdet_plugin/models/backbones_2d/map_to_bev/__init__.py new file mode 100644 index 0000000..52d1a3d --- /dev/null +++ b/examples/openpcdet/pcdet_plugin/models/backbones_2d/map_to_bev/__init__.py @@ -0,0 +1,5 @@ +from .height_compression import HeightCompressionTS +# from pcdet.models.backbones_2d.map_to_bev.__init__ import __all__ + +# # Try register a pcdet model this way. +# __all__['HeightCompressionTS'] = HeightCompressionTS diff --git a/examples/openpcdet/pcdet_plugin/models/backbones_2d/map_to_bev/height_compression.py b/examples/openpcdet/pcdet_plugin/models/backbones_2d/map_to_bev/height_compression.py new file mode 100644 index 0000000..7de8462 --- /dev/null +++ b/examples/openpcdet/pcdet_plugin/models/backbones_2d/map_to_bev/height_compression.py @@ -0,0 +1,28 @@ +import torch.nn as nn + +class HeightCompressionTS(nn.Module): + def __init__(self, model_cfg, **kwargs): + super().__init__() + self.model_cfg = model_cfg + self.num_bev_features = self.model_cfg.NUM_BEV_FEATURES + + def forward(self, batch_dict): + """ + Args: + batch_dict: + encoded_spconv_tensor: sparse tensor + Returns: + batch_dict: + spatial_features: + + """ + encoded_spconv_tensor = batch_dict['encoded_spconv_tensor'] + spatial_features = encoded_spconv_tensor.dense() + + N, D, H, W, C = spatial_features.shape + spatial_features = spatial_features.permute(0, 2, 3, 4, 1).contiguous().reshape(N, H, W, C*D).permute(0, 3, 1, 2).contiguous() + + batch_dict['spatial_features'] = spatial_features + batch_dict['spatial_features_stride'] = batch_dict['encoded_spconv_tensor_stride'] + return batch_dict + diff --git a/examples/openpcdet/pcdet_plugin/models/backbones_3d/__init__.py b/examples/openpcdet/pcdet_plugin/models/backbones_3d/__init__.py new file mode 100644 index 0000000..4850ee8 --- /dev/null +++ b/examples/openpcdet/pcdet_plugin/models/backbones_3d/__init__.py @@ -0,0 +1,4 @@ +from .backbone3d import VoxelBackBone8xTS +from .unet import UNetV2TS +from .pfe import VoxelSetAbstractionTS +from .backbone_voxel_next import VoxelResBackBone8xVoxelNeXtTS diff --git a/examples/openpcdet/pcdet_plugin/models/backbones_3d/backbone3d.py b/examples/openpcdet/pcdet_plugin/models/backbones_3d/backbone3d.py new file mode 100644 index 0000000..284bc56 --- /dev/null +++ b/examples/openpcdet/pcdet_plugin/models/backbones_3d/backbone3d.py @@ -0,0 +1,151 @@ +from functools import partial + +import torch +import torch.nn as nn +import torchsparse +import torchsparse.nn as spnn + +import os, logging + +from pcdet.models.backbones_3d.__init__ import __all__ + +def post_act_block_ts(in_channels, out_channels, kernel_size, indice_key=None, stride=1, padding=0, + conv_type='tsconv', norm_fn=None): + + if conv_type == 'tsconv': + conv = spnn.Conv3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False) + elif conv_type == 'inverseconv': + conv = spnn.Conv3d(in_channels, out_channels, kernel_size, stride=stride, bias=False, transposed=True) + else: + raise NotImplementedError + + m = nn.Sequential( + conv, + norm_fn(out_channels), + spnn.ReLU(), + ) + + return m + + +class VoxelBackBone8xTS(nn.Module): + def __init__(self, model_cfg, input_channels, grid_size, **kwargs): + super().__init__() + self.model_cfg = model_cfg + norm_fn = partial(spnn.BatchNorm, eps=1e-3, momentum=0.01) + + self.sparse_shape = grid_size[::-1] + [1, 0, 0] + + self.conv_input = nn.Sequential( + spnn.Conv3d(input_channels, 16, 3, padding=1, bias=False), + spnn.BatchNorm(16), + spnn.ReLU(), + ) + + block = post_act_block_ts + + self.conv1 = nn.Sequential( + block(16, 16, 3, norm_fn=norm_fn, padding=1, indice_key='subm1'), + ) + + self.conv2 = nn.Sequential( + # [1600, 1408, 41] <- [800, 704, 21] + block(16, 32, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv2', conv_type='tsconv'), + block(32, 32, 3, norm_fn=norm_fn, padding=1, indice_key='subm2'), + block(32, 32, 3, norm_fn=norm_fn, padding=1, indice_key='subm2'), + ) + + self.conv3 = nn.Sequential( + # [800, 704, 21] <- [400, 352, 11] + block(32, 64, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv3', conv_type='tsconv'), + block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'), + block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'), + ) + + self.conv4 = nn.Sequential( + # [400, 352, 11] <- [200, 176, 5] + block(64, 64, 3, norm_fn=norm_fn, stride=2, padding=(0, 1, 1), indice_key='spconv4', conv_type='tsconv'), + block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4'), + block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4'), + ) + + last_pad = 0 + last_pad = self.model_cfg.get('last_pad', last_pad) + self.conv_out = nn.Sequential( + # [200, 150, 5] -> [200, 150, 2] + spnn.Conv3d(64, 128, (3, 1, 1), stride=(2, 1, 1), padding=last_pad, + bias=False), + norm_fn(128), + spnn.ReLU(), + ) + self.num_point_features = 128 + self.backbone_channels = { + 'x_conv1': 16, + 'x_conv2': 32, + 'x_conv3': 64, + 'x_conv4': 64 + } + + logging.warning('Built VoxelBackBone8x for TorchSparse') + + + def forward(self, batch_dict): + """ + Args: + batch_dict: + batch_size: int + vfe_features: (num_voxels, C) + voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx] + Returns: + batch_dict: + encoded_spconv_tensor: sparse tensor + """ + voxel_features, voxel_coords = batch_dict['voxel_features'], batch_dict['voxel_coords'] + batch_size = batch_dict['batch_size'] + # input_sp_tensor = spconv.SparseConvTensor( + # features=voxel_features, + # indices=voxel_coords.int(), + # spatial_shape=self.sparse_shape, + # batch_size=batch_size + # ) + voxel_coords = voxel_coords.int() + # input_sp_tensor = spconv.SparseConvTensor(voxel_features, coors, self.sparse_shape, batch_size) + spatial_range = (voxel_coords[:, 0].max().item() + 1,) + tuple(self.sparse_shape) + input_sp_tensor = torchsparse.SparseTensor(voxel_features, voxel_coords, spatial_range=spatial_range) # dimension match + + + x = self.conv_input(input_sp_tensor) + + x_conv1 = self.conv1(x) + x_conv2 = self.conv2(x_conv1) + x_conv3 = self.conv3(x_conv2) + x_conv4 = self.conv4(x_conv3) + + # for detection head + # [200, 176, 5] -> [200, 176, 2] + out = self.conv_out(x_conv4) + + batch_dict.update({ + 'encoded_spconv_tensor': out, + 'encoded_spconv_tensor_stride': 8 + }) + batch_dict.update({ + 'multi_scale_3d_features': { + 'x_conv1': x_conv1, + 'x_conv2': x_conv2, + 'x_conv3': x_conv3, + 'x_conv4': x_conv4, + } + }) + batch_dict.update({ + 'multi_scale_3d_strides': { + 'x_conv1': 1, + 'x_conv2': 2, + 'x_conv3': 4, + 'x_conv4': 8, + } + }) + + return batch_dict + +__all__['VoxelBackBone8xTS'] = VoxelBackBone8xTS \ No newline at end of file diff --git a/examples/openpcdet/pcdet_plugin/models/backbones_3d/backbone_voxel_next.py b/examples/openpcdet/pcdet_plugin/models/backbones_3d/backbone_voxel_next.py new file mode 100644 index 0000000..9f097c9 --- /dev/null +++ b/examples/openpcdet/pcdet_plugin/models/backbones_3d/backbone_voxel_next.py @@ -0,0 +1,279 @@ +from functools import partial +import torch +import torch.nn as nn +import os +from pcdet.utils.spconv_utils import spconv +import torchsparse +import torchsparse.nn as spnn +from torchsparse.utils.tensor_cache import TensorCache +import logging + + +def ts_to_spconv(tensor: torchsparse.SparseTensor, spatial_shape, batch_size): + return spconv.SparseConvTensor( + features=tensor.feats, + indices=tensor.coords, + spatial_shape=spatial_shape, # Take out batch size and channel + batch_size=batch_size + ) + +def spconv_to_ts(tensor: spconv.SparseConvTensor): + spatial_range = None + if len(tensor.indices[0]) == 3: + spatial_range = (tensor.batch_size, ) + tuple(tensor.spatial_shape) + (1, ) + elif len(tensor.indices[0]) == 4: + spatial_range = (tensor.batch_size, ) + (tensor.spatial_shape, ) + else: + raise NotImplementedError("Only 3D and 4D tensors are supported.") + + return torchsparse.SparseTensor( + feats=tensor.features, + coords=tensor.indices, + spatial_range=spatial_range + ) + + +def post_act_block_ts(in_channels, out_channels, kernel_size, indice_key=None, stride=1, padding=0, + conv_type='subm', norm_fn=None): + + if conv_type == 'subm': + conv = spnn.Conv3d(in_channels, out_channels, kernel_size, bias=False) + elif conv_type == 'spconv': + conv = spnn.Conv3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False) + elif conv_type == 'inverseconv': + conv = spnn.Conv3d(in_channels, out_channels, kernel_size, bias=False, transposed=True) + else: + raise NotImplementedError + + m = nn.Sequential( + conv, + norm_fn(out_channels), + spnn.ReLU(), + ) + + return m + + +class SparseBasicBlockTS(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, norm_fn=None, downsample=None, indice_key=None): + super(SparseBasicBlockTS, self).__init__() + + assert norm_fn is not None + bias = norm_fn is not None + self.conv1 = spnn.Conv3d( + inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=bias + ) + self.bn1 = nn.BatchNorm1d(planes, eps=1e-3, momentum=0.01) # norm_fn(planes) + self.relu = nn.ReLU() + self.conv2 = spnn.Conv3d( + planes, planes, kernel_size=3, stride=stride, padding=1, bias=bias + ) + self.bn2 = nn.BatchNorm1d(planes, eps=1e-3, momentum=0.01) # norm_fn(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out.feats = self.bn1(out.feats) + out.feats = self.relu(out.feats) + + out = self.conv2(out) + out.feats = self.bn2(out.feats) + + if self.downsample is not None: + identity = self.downsample(x) + + out.feats = out.feats + identity.feats + out.feats = self.relu(out.feats) + + return out + +class VoxelResBackBone8xVoxelNeXtTS(nn.Module): + def __init__(self, model_cfg, input_channels, grid_size, **kwargs): + super().__init__() + self.model_cfg = model_cfg +# norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01) + norm_fn = partial(torchsparse.nn.BatchNorm, eps=1e-3, momentum=0.01) + + spconv_kernel_sizes = model_cfg.get('SPCONV_KERNEL_SIZES', [3, 3, 3, 3]) + channels = model_cfg.get('CHANNELS', [16, 32, 64, 128, 128]) + out_channel = model_cfg.get('OUT_CHANNEL', 128) + + self.sparse_shape = grid_size[::-1] + [1, 0, 0] + + self.conv_input = nn.Sequential( + spnn.Conv3d(input_channels, channels[0], 3, padding=1, bias=False), + norm_fn(channels[0]), + spnn.ReLU() + ) + block = post_act_block_ts + + self.conv1 = nn.Sequential( + SparseBasicBlockTS(channels[0], channels[0], norm_fn=norm_fn), + SparseBasicBlockTS(channels[0], channels[0], norm_fn=norm_fn), + ) + + self.conv2 = nn.Sequential( + # [1600, 1408, 41] <- [800, 704, 21] + block(channels[0], channels[1], spconv_kernel_sizes[0], norm_fn=norm_fn, stride=2, padding=int(spconv_kernel_sizes[0]//2), conv_type='spconv'), + SparseBasicBlockTS(channels[1], channels[1], norm_fn=norm_fn), + SparseBasicBlockTS(channels[1], channels[1], norm_fn=norm_fn), + ) + + self.conv3 = nn.Sequential( + # [800, 704, 21] <- [400, 352, 11] + block(channels[1], channels[2], spconv_kernel_sizes[1], norm_fn=norm_fn, stride=2, padding=int(spconv_kernel_sizes[1]//2), conv_type='spconv'), + SparseBasicBlockTS(channels[2], channels[2], norm_fn=norm_fn), + SparseBasicBlockTS(channels[2], channels[2], norm_fn=norm_fn), + ) + + self.conv4 = nn.Sequential( + # [400, 352, 11] <- [200, 176, 6] + block(channels[2], channels[3], spconv_kernel_sizes[2], norm_fn=norm_fn, stride=2, padding=int(spconv_kernel_sizes[2]//2), conv_type='spconv'), + SparseBasicBlockTS(channels[3], channels[3], norm_fn=norm_fn), + SparseBasicBlockTS(channels[3], channels[3], norm_fn=norm_fn), + ) + + self.conv5 = nn.Sequential( + # [200, 176, 6] <- [100, 88, 3] + block(channels[3], channels[4], spconv_kernel_sizes[3], norm_fn=norm_fn, stride=2, padding=int(spconv_kernel_sizes[3]//2), conv_type='spconv'), + SparseBasicBlockTS(channels[4], channels[4], norm_fn=norm_fn), + SparseBasicBlockTS(channels[4], channels[4], norm_fn=norm_fn), + ) + + self.conv6 = nn.Sequential( + # [200, 176, 6] <- [100, 88, 3] + block(channels[4], channels[4], spconv_kernel_sizes[3], norm_fn=norm_fn, stride=2, padding=int(spconv_kernel_sizes[3]//2), conv_type='spconv'), + SparseBasicBlockTS(channels[4], channels[4], norm_fn=norm_fn), + SparseBasicBlockTS(channels[4], channels[4], norm_fn=norm_fn), + ) + # self.conv_out = nn.Sequential( + # # [200, 150, 5] -> [200, 150, 2], zyx in SpConv -> zyx in TS + # spnn.Conv3d(channels[3], out_channel, (1,3,3), padding=1, bias=False), + # norm_fn(out_channel), + # spnn.ReLU(), + # ) + + self.conv_out = spconv.SparseSequential( + spconv.SparseConv2d(channels[3], out_channel, 3, stride=1, padding=1, bias=False), + nn.BatchNorm1d(out_channel,eps=1e-3, momentum=0.01), + nn.ReLU(), + ) + + self.shared_conv = nn.Sequential( + spnn.Conv3d(out_channel, out_channel, kernel_size=(3,3,1), bias=True), + spnn.BatchNorm(out_channel), + spnn.ReLU(), + ) + + self.forward_ret_dict = {} + self.num_point_features = out_channel + self.backbone_channels = { + 'x_conv1': channels[0], + 'x_conv2': channels[1], + 'x_conv3': channels[2], + 'x_conv4': channels[3] + } + logging.info('VoxelNeXt TorchSparse') + + def bev_out(self, x_conv): + features_cat = x_conv.feats + indices_cat = x_conv.coords[:, [0, 2, 3]] + spatial_shape = x_conv.spatial_range[1:-1] + channels = x_conv.spatial_range[-1] + + indices_unique, _inv = torch.unique(indices_cat, dim=0, return_inverse=True) + features_unique = features_cat.new_zeros((indices_unique.shape[0], features_cat.shape[1])) + features_unique.index_add_(0, _inv, features_cat) + + x_out = torchsparse.SparseTensor( + feats=features_unique, + coords=indices_unique, + spatial_range=(x_conv.spatial_range[0],) + spatial_shape + (channels, ) + ) + return x_out + + def forward(self, batch_dict): + """ + Args: + batch_dict: + batch_size: int + vfe_features: (num_voxels, C) + voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx] + Returns: + batch_dict: + encoded_spconv_tensor: sparse tensor + """ + voxel_features, voxel_coords = batch_dict['voxel_features'], batch_dict['voxel_coords'] + batch_size = batch_dict['batch_size'] + input_sp_tensor = torchsparse.SparseTensor( + feats=voxel_features, + coords=voxel_coords.int(), + spatial_range=(batch_size, ) + tuple(self.sparse_shape) + (4, ) + ) + # input_sp_tensor = spconv.SparseConvTensor( + # features=voxel_features, + # indices=voxel_coords.int(), + # spatial_shape=self.sparse_shape, + # batch_size=batch_size + # ) + x = self.conv_input(input_sp_tensor) + + x_conv1 = self.conv1(x) + x_conv2 = self.conv2(x_conv1) + x_conv3 = self.conv3(x_conv2) + x_conv4 = self.conv4(x_conv3) + x_conv5 = self.conv5(x_conv4) + x_conv6 = self.conv6(x_conv5) + + # print("x_conv6.feats") + # print(x_conv6.feats) + + x_conv5.coords[:, 1:] *= 2 + x_conv6.coords[:, 1:] *= 4 + x_conv4.feats = torch.cat([x_conv4.feats, x_conv5.feats, x_conv6.feats]) + x_conv4.coords = torch.cat([x_conv4.coords, x_conv5.coords, x_conv6.coords]) + + + out = self.bev_out(x_conv4) + + # out = ts_to_spconv(out) + # out.spatal_range = out.spatial_range + (1, ) + + out = ts_to_spconv(out, spatial_shape=out.spatial_range[2:], batch_size=out.spatial_range[0]) + out = self.conv_out(out) + out = spconv_to_ts(out) + out.coords = torch.cat((out.coords, torch.zeros((out.coords.shape[0], 1)).to('cuda')), dim=1).int() + out = self.shared_conv(out) + + batch_dict.update({ + 'encoded_spconv_tensor': out, + 'encoded_spconv_tensor_stride': 8 + }) + batch_dict.update({ + 'multi_scale_3d_features': { + 'x_conv1': x_conv1, + 'x_conv2': x_conv2, + 'x_conv3': x_conv3, + 'x_conv4': x_conv4, + } + }) + batch_dict.update({ + 'multi_scale_3d_strides': { + 'x_conv1': 1, + 'x_conv2': 2, + 'x_conv3': 4, + 'x_conv4': 8, + } + }) + # print("out.feats") + # print(out.feats) + + # print("batch_dict") + # print(batch_dict) + + return batch_dict diff --git a/examples/openpcdet/pcdet_plugin/models/backbones_3d/pfe.py b/examples/openpcdet/pcdet_plugin/models/backbones_3d/pfe.py new file mode 100644 index 0000000..1d12056 --- /dev/null +++ b/examples/openpcdet/pcdet_plugin/models/backbones_3d/pfe.py @@ -0,0 +1,413 @@ +import torch +import math, os +import numpy as np +import torch.nn as nn +import torchsparse.nn as spnn + +from pcdet.ops.pointnet2.pointnet2_stack import pointnet2_modules as pointnet2_stack_modules +from pcdet.ops.pointnet2.pointnet2_stack import pointnet2_utils as pointnet2_stack_utils +from pcdet.utils import common_utils + +import logging + +def bilinear_interpolate_torch(im, x, y): + """ + Args: + im: (H, W, C) [y, x] + x: (N) + y: (N) + + Returns: + + """ + x0 = torch.floor(x).long() + x1 = x0 + 1 + + y0 = torch.floor(y).long() + y1 = y0 + 1 + + x0 = torch.clamp(x0, 0, im.shape[1] - 1) + x1 = torch.clamp(x1, 0, im.shape[1] - 1) + y0 = torch.clamp(y0, 0, im.shape[0] - 1) + y1 = torch.clamp(y1, 0, im.shape[0] - 1) + + Ia = im[y0, x0] + Ib = im[y1, x0] + Ic = im[y0, x1] + Id = im[y1, x1] + + wa = (x1.type_as(x) - x) * (y1.type_as(y) - y) + wb = (x1.type_as(x) - x) * (y - y0.type_as(y)) + wc = (x - x0.type_as(x)) * (y1.type_as(y) - y) + wd = (x - x0.type_as(x)) * (y - y0.type_as(y)) + ans = torch.t((torch.t(Ia) * wa)) + torch.t(torch.t(Ib) * wb) + torch.t(torch.t(Ic) * wc) + torch.t(torch.t(Id) * wd) + return ans + + +def sample_points_with_roi(rois, points, sample_radius_with_roi, num_max_points_of_part=200000): + """ + Args: + rois: (M, 7 + C) + points: (N, 3) + sample_radius_with_roi: + num_max_points_of_part: + + Returns: + sampled_points: (N_out, 3) + """ + if points.shape[0] < num_max_points_of_part: + distance = (points[:, None, :] - rois[None, :, 0:3]).norm(dim=-1) + min_dis, min_dis_roi_idx = distance.min(dim=-1) + roi_max_dim = (rois[min_dis_roi_idx, 3:6] / 2).norm(dim=-1) + point_mask = min_dis < roi_max_dim + sample_radius_with_roi + else: + start_idx = 0 + point_mask_list = [] + while start_idx < points.shape[0]: + distance = (points[start_idx:start_idx + num_max_points_of_part, None, :] - rois[None, :, 0:3]).norm(dim=-1) + min_dis, min_dis_roi_idx = distance.min(dim=-1) + roi_max_dim = (rois[min_dis_roi_idx, 3:6] / 2).norm(dim=-1) + cur_point_mask = min_dis < roi_max_dim + sample_radius_with_roi + point_mask_list.append(cur_point_mask) + start_idx += num_max_points_of_part + point_mask = torch.cat(point_mask_list, dim=0) + + sampled_points = points[:1] if point_mask.sum() == 0 else points[point_mask, :] + + return sampled_points, point_mask + +def sector_fps(points, num_sampled_points, num_sectors): + """ + Args: + points: (N, 3) + num_sampled_points: int + num_sectors: int + + Returns: + sampled_points: (N_out, 3) + """ + sector_size = np.pi * 2 / num_sectors + point_angles = torch.atan2(points[:, 1], points[:, 0]) + np.pi + sector_idx = (point_angles / sector_size).floor().clamp(min=0, max=num_sectors) + xyz_points_list = [] + xyz_batch_cnt = [] + num_sampled_points_list = [] + for k in range(num_sectors): + mask = (sector_idx == k) + cur_num_points = mask.sum().item() + if cur_num_points > 0: + xyz_points_list.append(points[mask]) + xyz_batch_cnt.append(cur_num_points) + ratio = cur_num_points / points.shape[0] + num_sampled_points_list.append( + min(cur_num_points, math.ceil(ratio * num_sampled_points)) + ) + + if len(xyz_batch_cnt) == 0: + xyz_points_list.append(points) + xyz_batch_cnt.append(len(points)) + num_sampled_points_list.append(num_sampled_points) + print(f'Warning: empty sector points detected in SectorFPS: points.shape={points.shape}') + + xyz = torch.cat(xyz_points_list, dim=0) + xyz_batch_cnt = torch.tensor(xyz_batch_cnt, device=points.device).int() + sampled_points_batch_cnt = torch.tensor(num_sampled_points_list, device=points.device).int() + + sampled_pt_idxs = pointnet2_stack_utils.stack_farthest_point_sample( + xyz.contiguous(), xyz_batch_cnt, sampled_points_batch_cnt + ).long() + + sampled_points = xyz[sampled_pt_idxs] + + return sampled_points + +class VoxelSetAbstractionTS(nn.Module): + def __init__(self, model_cfg, voxel_size, point_cloud_range, num_bev_features=None, + num_rawpoint_features=None, **kwargs): + super().__init__() + self.model_cfg = model_cfg + self.voxel_size = voxel_size + self.point_cloud_range = point_cloud_range + + SA_cfg = self.model_cfg.SA_LAYER + + self.SA_layers = nn.ModuleList() + self.SA_layer_names = [] + self.downsample_times_map = {} + c_in = 0 + for src_name in self.model_cfg.FEATURES_SOURCE: + if src_name in ['bev', 'raw_points']: + continue + self.downsample_times_map[src_name] = SA_cfg[src_name].DOWNSAMPLE_FACTOR + + if SA_cfg[src_name].get('INPUT_CHANNELS', None) is None: + input_channels = SA_cfg[src_name].MLPS[0][0] \ + if isinstance(SA_cfg[src_name].MLPS[0], list) else SA_cfg[src_name].MLPS[0] + else: + input_channels = SA_cfg[src_name]['INPUT_CHANNELS'] + + cur_layer, cur_num_c_out = pointnet2_stack_modules.build_local_aggregation_module( + input_channels=input_channels, config=SA_cfg[src_name] + ) + self.SA_layers.append(cur_layer) + self.SA_layer_names.append(src_name) + + c_in += cur_num_c_out + + if 'bev' in self.model_cfg.FEATURES_SOURCE: + c_bev = num_bev_features + c_in += c_bev + + if 'raw_points' in self.model_cfg.FEATURES_SOURCE: + self.SA_rawpoints, cur_num_c_out = pointnet2_stack_modules.build_local_aggregation_module( + input_channels=num_rawpoint_features - 3, config=SA_cfg['raw_points'] + ) + + c_in += cur_num_c_out + + self.vsa_point_feature_fusion = nn.Sequential( + nn.Linear(c_in, self.model_cfg.NUM_OUTPUT_FEATURES, bias=False), + nn.BatchNorm1d(self.model_cfg.NUM_OUTPUT_FEATURES), + nn.ReLU(), + ) + self.num_point_features = self.model_cfg.NUM_OUTPUT_FEATURES + self.num_point_features_before_fusion = c_in + logging.warning("Built VoxelSetAbstraction TorchSparse") + + def interpolate_from_bev_features(self, keypoints, bev_features, batch_size, bev_stride): + """ + Args: + keypoints: (N1 + N2 + ..., 4) + bev_features: (B, C, H, W) + batch_size: + bev_stride: + + Returns: + point_bev_features: (N1 + N2 + ..., C) + """ + x_idxs = (keypoints[:, 1] - self.point_cloud_range[0]) / self.voxel_size[0] + y_idxs = (keypoints[:, 2] - self.point_cloud_range[1]) / self.voxel_size[1] + + x_idxs = x_idxs / bev_stride + y_idxs = y_idxs / bev_stride + + point_bev_features_list = [] + for k in range(batch_size): + bs_mask = (keypoints[:, 0] == k) + + cur_x_idxs = x_idxs[bs_mask] + cur_y_idxs = y_idxs[bs_mask] + cur_bev_features = bev_features[k].permute(1, 2, 0) # (H, W, C) + point_bev_features = bilinear_interpolate_torch(cur_bev_features, cur_x_idxs, cur_y_idxs) + point_bev_features_list.append(point_bev_features) + + point_bev_features = torch.cat(point_bev_features_list, dim=0) # (N1 + N2 + ..., C) + return point_bev_features + + def sectorized_proposal_centric_sampling(self, roi_boxes, points): + """ + Args: + roi_boxes: (M, 7 + C) + points: (N, 3) + + Returns: + sampled_points: (N_out, 3) + """ + + sampled_points, _ = sample_points_with_roi( + rois=roi_boxes, points=points, + sample_radius_with_roi=self.model_cfg.SPC_SAMPLING.SAMPLE_RADIUS_WITH_ROI, + num_max_points_of_part=self.model_cfg.SPC_SAMPLING.get('NUM_POINTS_OF_EACH_SAMPLE_PART', 200000) + ) + sampled_points = sector_fps( + points=sampled_points, num_sampled_points=self.model_cfg.NUM_KEYPOINTS, + num_sectors=self.model_cfg.SPC_SAMPLING.NUM_SECTORS + ) + return sampled_points + + def get_sampled_points(self, batch_dict): + """ + Args: + batch_dict: + + Returns: + keypoints: (N1 + N2 + ..., 4), where 4 indicates [bs_idx, x, y, z] + """ + batch_size = batch_dict['batch_size'] + if self.model_cfg.POINT_SOURCE == 'raw_points': + src_points = batch_dict['points'][:, 1:4] + batch_indices = batch_dict['points'][:, 0].long() + elif self.model_cfg.POINT_SOURCE == 'voxel_centers': + src_points = common_utils.get_voxel_centers( + batch_dict['voxel_coords'][:, 1:4], + downsample_times=1, + voxel_size=self.voxel_size, + point_cloud_range=self.point_cloud_range + ) + batch_indices = batch_dict['voxel_coords'][:, 0].long() + else: + raise NotImplementedError + keypoints_list = [] + for bs_idx in range(batch_size): + bs_mask = (batch_indices == bs_idx) + sampled_points = src_points[bs_mask].unsqueeze(dim=0) # (1, N, 3) + if self.model_cfg.SAMPLE_METHOD == 'FPS': + cur_pt_idxs = pointnet2_stack_utils.farthest_point_sample( + sampled_points[:, :, 0:3].contiguous(), self.model_cfg.NUM_KEYPOINTS + ).long() + + if sampled_points.shape[1] < self.model_cfg.NUM_KEYPOINTS: + times = int(self.model_cfg.NUM_KEYPOINTS / sampled_points.shape[1]) + 1 + non_empty = cur_pt_idxs[0, :sampled_points.shape[1]] + cur_pt_idxs[0] = non_empty.repeat(times)[:self.model_cfg.NUM_KEYPOINTS] + + keypoints = sampled_points[0][cur_pt_idxs[0]].unsqueeze(dim=0) + + elif self.model_cfg.SAMPLE_METHOD == 'SPC': + cur_keypoints = self.sectorized_proposal_centric_sampling( + roi_boxes=batch_dict['rois'][bs_idx], points=sampled_points[0] + ) + bs_idxs = cur_keypoints.new_ones(cur_keypoints.shape[0]) * bs_idx + keypoints = torch.cat((bs_idxs[:, None], cur_keypoints), dim=1) + else: + raise NotImplementedError + + keypoints_list.append(keypoints) + + keypoints = torch.cat(keypoints_list, dim=0) # (B, M, 3) or (N1 + N2 + ..., 4) + if len(keypoints.shape) == 3: + batch_idx = torch.arange(batch_size, device=keypoints.device).view(-1, 1).repeat(1, keypoints.shape[1]).view(-1, 1) + keypoints = torch.cat((batch_idx.float(), keypoints.view(-1, 3)), dim=1) + + return keypoints + + @staticmethod + def aggregate_keypoint_features_from_one_source( + batch_size, aggregate_func, xyz, xyz_features, xyz_bs_idxs, new_xyz, new_xyz_batch_cnt, + filter_neighbors_with_roi=False, radius_of_neighbor=None, num_max_points_of_part=200000, rois=None + ): + """ + + Args: + aggregate_func: + xyz: (N, 3) + xyz_features: (N, C) + xyz_bs_idxs: (N) + new_xyz: (M, 3) + new_xyz_batch_cnt: (batch_size), [N1, N2, ...] + + filter_neighbors_with_roi: True/False + radius_of_neighbor: float + num_max_points_of_part: int + rois: (batch_size, num_rois, 7 + C) + Returns: + + """ + xyz_batch_cnt = xyz.new_zeros(batch_size).int() + if filter_neighbors_with_roi: + point_features = torch.cat((xyz, xyz_features), dim=-1) if xyz_features is not None else xyz + point_features_list = [] + for bs_idx in range(batch_size): + bs_mask = (xyz_bs_idxs == bs_idx) + _, valid_mask = sample_points_with_roi( + rois=rois[bs_idx], points=xyz[bs_mask], + sample_radius_with_roi=radius_of_neighbor, num_max_points_of_part=num_max_points_of_part, + ) + point_features_list.append(point_features[bs_mask][valid_mask]) + xyz_batch_cnt[bs_idx] = valid_mask.sum() + + valid_point_features = torch.cat(point_features_list, dim=0) + xyz = valid_point_features[:, 0:3] + xyz_features = valid_point_features[:, 3:] if xyz_features is not None else None + else: + for bs_idx in range(batch_size): + xyz_batch_cnt[bs_idx] = (xyz_bs_idxs == bs_idx).sum() + + pooled_points, pooled_features = aggregate_func( + xyz=xyz.contiguous(), + xyz_batch_cnt=xyz_batch_cnt, + new_xyz=new_xyz, + new_xyz_batch_cnt=new_xyz_batch_cnt, + features=xyz_features.contiguous(), + ) + return pooled_features + + def forward(self, batch_dict): + """ + Args: + batch_dict: + batch_size: + keypoints: (B, num_keypoints, 3) + multi_scale_3d_features: { + 'x_conv4': ... + } + points: optional (N, 1 + 3 + C) [bs_idx, x, y, z, ...] + spatial_features: optional + spatial_features_stride: optional + + Returns: + point_features: (N, C) + point_coords: (N, 4) + + """ + keypoints = self.get_sampled_points(batch_dict) + + point_features_list = [] + if 'bev' in self.model_cfg.FEATURES_SOURCE: + point_bev_features = self.interpolate_from_bev_features( + keypoints, batch_dict['spatial_features'], batch_dict['batch_size'], + bev_stride=batch_dict['spatial_features_stride'] + ) + point_features_list.append(point_bev_features) + + batch_size = batch_dict['batch_size'] + + new_xyz = keypoints[:, 1:4].contiguous() + new_xyz_batch_cnt = new_xyz.new_zeros(batch_size).int() + for k in range(batch_size): + new_xyz_batch_cnt[k] = (keypoints[:, 0] == k).sum() + + if 'raw_points' in self.model_cfg.FEATURES_SOURCE: + raw_points = batch_dict['points'] + + pooled_features = self.aggregate_keypoint_features_from_one_source( + batch_size=batch_size, aggregate_func=self.SA_rawpoints, + xyz=raw_points[:, 1:4], + xyz_features=raw_points[:, 4:].contiguous() if raw_points.shape[1] > 4 else None, + xyz_bs_idxs=raw_points[:, 0], + new_xyz=new_xyz, new_xyz_batch_cnt=new_xyz_batch_cnt, + filter_neighbors_with_roi=self.model_cfg.SA_LAYER['raw_points'].get('FILTER_NEIGHBOR_WITH_ROI', False), + radius_of_neighbor=self.model_cfg.SA_LAYER['raw_points'].get('RADIUS_OF_NEIGHBOR_WITH_ROI', None), + rois=batch_dict.get('rois', None) + ) + point_features_list.append(pooled_features) + + for k, src_name in enumerate(self.SA_layer_names): + cur_coords = batch_dict['multi_scale_3d_features'][src_name].coords + cur_features = batch_dict['multi_scale_3d_features'][src_name].feats.contiguous() + + xyz = common_utils.get_voxel_centers( + cur_coords[:, 1:4], downsample_times=self.downsample_times_map[src_name], + voxel_size=self.voxel_size, point_cloud_range=self.point_cloud_range + ) + + pooled_features = self.aggregate_keypoint_features_from_one_source( + batch_size=batch_size, aggregate_func=self.SA_layers[k], + xyz=xyz.contiguous(), xyz_features=cur_features, xyz_bs_idxs=cur_coords[:, 0], + new_xyz=new_xyz, new_xyz_batch_cnt=new_xyz_batch_cnt, + filter_neighbors_with_roi=self.model_cfg.SA_LAYER[src_name].get('FILTER_NEIGHBOR_WITH_ROI', False), + radius_of_neighbor=self.model_cfg.SA_LAYER[src_name].get('RADIUS_OF_NEIGHBOR_WITH_ROI', None), + rois=batch_dict.get('rois', None) + ) + + point_features_list.append(pooled_features) + + point_features = torch.cat(point_features_list, dim=-1) + + batch_dict['point_features_before_fusion'] = point_features.view(-1, point_features.shape[-1]) + point_features = self.vsa_point_feature_fusion(point_features.view(-1, point_features.shape[-1])) + + batch_dict['point_features'] = point_features # (BxN, C) + batch_dict['point_coords'] = keypoints # (BxN, 4) + return batch_dict + diff --git a/examples/openpcdet/pcdet_plugin/models/backbones_3d/unet.py b/examples/openpcdet/pcdet_plugin/models/backbones_3d/unet.py new file mode 100644 index 0000000..0a19492 --- /dev/null +++ b/examples/openpcdet/pcdet_plugin/models/backbones_3d/unet.py @@ -0,0 +1,222 @@ +from functools import partial + +import torch +import torch.nn as nn + +from pcdet.utils.spconv_utils import spconv +from pcdet.utils import common_utils +from .backbone3d import post_act_block_ts + +import os + +import torchsparse.nn as spnn +import torchsparse + +def ts_replace_feature(x: torchsparse.SparseTensor, features): + x.feats = features + return x + +class SparseBasicBlockTS(spconv.SparseModule): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, indice_key=None, norm_fn=None): + super(SparseBasicBlockTS, self).__init__() + self.conv1 = spnn.Conv3d( + inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False + ) + self.bn1 = norm_fn(planes) + self.relu = spnn.ReLU() + self.conv2 = spnn.Conv3d( + planes, planes, kernel_size=3, stride=1, padding=1, bias=False + ) + self.bn2 = norm_fn(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x: torchsparse.SparseTensor): + identity = x.feats + + assert x.feats.dim() == 2, 'x.features.dim()=%d' % x.features.dim() + + out: torchsparse.SparseTensor = self.conv1(x) + # out = ts_replace_feature(out, self.bn1(out.feats)) + # out = ts_replace_feature(out, self.relu(out.feats)) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + # out = ts_replace_feature(out, self.bn2(out.feats)) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out = ts_replace_feature(out, out.feats + identity) + # out = ts_replace_feature(out, self.relu(out.feats)) + out = self.relu(out) + return out + + +class UNetV2TS(nn.Module): + """ + Sparse Convolution based UNet for point-wise feature learning. + Reference Paper: https://arxiv.org/abs/1907.03670 (Shaoshuai Shi, et. al) + From Points to Parts: 3D Object Detection from Point Cloud with Part-aware and Part-aggregation Network + """ + + def __init__(self, model_cfg, input_channels, grid_size, voxel_size, point_cloud_range, **kwargs): + super().__init__() + self.model_cfg = model_cfg + self.sparse_shape = grid_size[::-1] + [1, 0, 0] + self.voxel_size = voxel_size + self.point_cloud_range = point_cloud_range + + norm_fn = partial(spnn.BatchNorm, eps=1e-3, momentum=0.01) + + self.conv_input = nn.Sequential( + spnn.Conv3d(input_channels, 16, 3, padding=1, bias=False), + norm_fn(16), + spnn.ReLU(), + ) + block = post_act_block_ts + + self.conv1 = nn.Sequential( + block(16, 16, 3, norm_fn=norm_fn, padding=1), + ) + + self.conv2 = nn.Sequential( + # [1600, 1408, 41] <- [800, 704, 21] + block(16, 32, 3, norm_fn=norm_fn, stride=2, padding=1), + block(32, 32, 3, norm_fn=norm_fn, padding=1), + block(32, 32, 3, norm_fn=norm_fn, padding=1), + ) + + self.conv3 = spconv.SparseSequential( + # [800, 704, 21] <- [400, 352, 11] + block(32, 64, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv3'), # Notice that conv_type='spconv' is replaced + block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'), + block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'), + ) + + self.conv4 = spconv.SparseSequential( + # [400, 352, 11] <- [200, 176, 5] + block(64, 64, 3, norm_fn=norm_fn, stride=2, padding=(0, 1, 1)), + block(64, 64, 3, norm_fn=norm_fn, padding=1), + block(64, 64, 3, norm_fn=norm_fn, padding=1), + ) + + if self.model_cfg.get('RETURN_ENCODED_TENSOR', True): + last_pad = self.model_cfg.get('last_pad', 0) + + self.conv_out = nn.Sequential( + # [200, 150, 5] -> [200, 150, 2] + spnn.Conv3d(64, 128, (3, 1, 1), stride=(2, 1, 1), padding=last_pad, + bias=False), + norm_fn(128), + spnn.ReLU(), + ) + else: + self.conv_out = None + + # decoder + # [400, 352, 11] <- [200, 176, 5] + self.conv_up_t4 = SparseBasicBlockTS(64, 64, indice_key='subm4', norm_fn=norm_fn) + self.conv_up_m4 = block(128, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4') + self.inv_conv4 = block(64, 64, 3, stride=2, norm_fn=norm_fn, indice_key='spconv4', conv_type='inverseconv') + + # [800, 704, 21] <- [400, 352, 11] + self.conv_up_t3 = SparseBasicBlockTS(64, 64, indice_key='subm3', norm_fn=norm_fn) + self.conv_up_m3 = block(128, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3') + self.inv_conv3 = block(64, 32, 3, stride=2, norm_fn=norm_fn, indice_key='spconv3', conv_type='inverseconv') + + # [1600, 1408, 41] <- [800, 704, 21] + self.conv_up_t2 = SparseBasicBlockTS(32, 32, indice_key='subm2', norm_fn=norm_fn) + self.conv_up_m2 = block(64, 32, 3, norm_fn=norm_fn, indice_key='subm2') + self.inv_conv2 = block(32, 16, 3, stride=2, norm_fn=norm_fn, indice_key='spconv2', conv_type='inverseconv') + + # [1600, 1408, 41] <- [1600, 1408, 41] + self.conv_up_t1 = SparseBasicBlockTS(16, 16, indice_key='subm1', norm_fn=norm_fn) + self.conv_up_m1 = block(32, 16, 3, norm_fn=norm_fn, indice_key='subm1') + + self.conv5 = nn.Sequential( + block(16, 16, 3, norm_fn=norm_fn, padding=1, indice_key='subm1') + ) + self.num_point_features = 16 + + def UR_block_forward(self, x_lateral, x_bottom, conv_t, conv_m, conv_inv): + x_trans = conv_t(x_lateral) + x = x_trans + x = ts_replace_feature(x, torch.cat((x_bottom.feats, x_trans.feats), dim=1)) # was dim 0 + x_m = conv_m(x) + x = self.channel_reduction(x, x_m.feats.shape[1]) + x = ts_replace_feature(x, x_m.feats + x.feats) + x = conv_inv(x) + return x + + @staticmethod + def channel_reduction(x: torchsparse.SparseTensor, out_channels): + """ + Args: + x: x.features (N, C1) + out_channels: C2 + + Returns: + + """ + features = x.feats + n, in_channels = features.shape + assert (in_channels % out_channels == 0) and (in_channels >= out_channels) + + x = ts_replace_feature(x, features.view(n, out_channels, -1).sum(dim=2)) + return x + + def forward(self, batch_dict): + """ + Args: + batch_dict: + batch_size: int + vfe_features: (num_voxels, C) + voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx] + Returns: + batch_dict: + encoded_spconv_tensor: sparse tensor + point_features: (N, C) + """ + voxel_features, voxel_coords = batch_dict['voxel_features'], batch_dict['voxel_coords'] + batch_size = batch_dict['batch_size'] + input_sp_tensor = torchsparse.SparseTensor( + feats=voxel_features, + coords=voxel_coords.int(), + spatial_range=(batch_size,) + tuple(self.sparse_shape) + ) + x = self.conv_input(input_sp_tensor) + + x_conv1 = self.conv1(x) + x_conv2 = self.conv2(x_conv1) + x_conv3 = self.conv3(x_conv2) + x_conv4 = self.conv4(x_conv3) + + if self.conv_out is not None: + # for detection head + # [200, 176, 5] -> [200, 176, 2] + out = self.conv_out(x_conv4) + batch_dict['encoded_spconv_tensor'] = out + batch_dict['encoded_spconv_tensor_stride'] = 8 + + # for segmentation head + # [400, 352, 11] <- [200, 176, 5] + x_up4 = self.UR_block_forward(x_conv4, x_conv4, self.conv_up_t4, self.conv_up_m4, self.inv_conv4) + # [800, 704, 21] <- [400, 352, 11] + x_up3 = self.UR_block_forward(x_conv3, x_up4, self.conv_up_t3, self.conv_up_m3, self.inv_conv3) + # [1600, 1408, 41] <- [800, 704, 21] + x_up2 = self.UR_block_forward(x_conv2, x_up3, self.conv_up_t2, self.conv_up_m2, self.inv_conv2) + # [1600, 1408, 41] <- [1600, 1408, 41] + x_up1 = self.UR_block_forward(x_conv1, x_up2, self.conv_up_t1, self.conv_up_m1, self.conv5) + + batch_dict['point_features'] = x_up1.feats + point_coords = common_utils.get_voxel_centers( + x_up1.coords[:, 1:], downsample_times=1, voxel_size=self.voxel_size, + point_cloud_range=self.point_cloud_range + ) + batch_dict['point_coords'] = torch.cat((x_up1.coords[:, 0:1].float(), point_coords), dim=1) + return batch_dict diff --git a/examples/openpcdet/pcdet_plugin/models/dense_heads/__init__.py b/examples/openpcdet/pcdet_plugin/models/dense_heads/__init__.py new file mode 100644 index 0000000..f279d37 --- /dev/null +++ b/examples/openpcdet/pcdet_plugin/models/dense_heads/__init__.py @@ -0,0 +1 @@ +from .voxel_next_head import VoxelNeXtHeadTS diff --git a/examples/openpcdet/pcdet_plugin/models/dense_heads/voxel_next_head.py b/examples/openpcdet/pcdet_plugin/models/dense_heads/voxel_next_head.py new file mode 100644 index 0000000..b164d5d --- /dev/null +++ b/examples/openpcdet/pcdet_plugin/models/dense_heads/voxel_next_head.py @@ -0,0 +1,574 @@ +import numpy as np +import torch +import torch.nn as nn +from torch.nn.init import kaiming_normal_ +from pcdet.models.model_utils import centernet_utils +from pcdet.models.model_utils import model_nms_utils +from pcdet.utils import loss_utils +from pcdet.utils.spconv_utils import spconv +import copy +from easydict import EasyDict +import torchsparse.nn as spnn +import os + +class seqz(nn.Module): + def forward(x): + return x.squeeze() + +class SeparateHeadTS(nn.Module): + def __init__(self, input_channels, sep_head_dict, kernel_size, init_bias=-2.19, use_bias=False): + super().__init__() + self.sep_head_dict = sep_head_dict + + for cur_name in self.sep_head_dict: + output_channels = self.sep_head_dict[cur_name]['out_channels'] + num_conv = self.sep_head_dict[cur_name]['num_conv'] + + fc_list = [] + for k in range(num_conv - 1): + fc_list.append( + nn.Sequential( + spnn.Conv3d(input_channels, input_channels, kernel_size=(1, kernel_size,kernel_size), padding=int(kernel_size//2), bias=use_bias), + spnn.BatchNorm(input_channels), + spnn.ReLU(), + ) + ) + fc_list.append( + spnn.Conv3d(input_channels, output_channels, kernel_size=1, bias=True), + ) + fc = nn.Sequential(*fc_list) + if 'hm' in cur_name: + fc[-1].bias.data.fill_(init_bias) + else: + for m in fc.modules(): + if isinstance(m, spnn.Conv3d): + kaiming_normal_(m.kernel.data) + if hasattr(m, "bias") and m.bias is not None: + nn.init.constant_(m.bias, 0) + + self.__setattr__(cur_name, fc) + + def forward(self, x): + ret_dict = {} + for cur_name in self.sep_head_dict: + ret_dict[cur_name] = self.__getattr__(cur_name)(x).feats + + return ret_dict + + +class VoxelNeXtHeadTS(nn.Module): + def __init__(self, model_cfg, input_channels, num_class, class_names, grid_size, point_cloud_range, voxel_size, + predict_boxes_when_training=False): + super().__init__() + self.model_cfg = model_cfg + self.num_class = num_class + self.grid_size = grid_size + self.point_cloud_range = torch.Tensor(point_cloud_range).cuda() + self.voxel_size = torch.Tensor(voxel_size).cuda() + self.feature_map_stride = self.model_cfg.TARGET_ASSIGNER_CONFIG.get('FEATURE_MAP_STRIDE', None) + + self.class_names = class_names + self.class_names_each_head = [] + self.class_id_mapping_each_head = [] + self.gaussian_ratio = self.model_cfg.get('GAUSSIAN_RATIO', 1) + self.gaussian_type = self.model_cfg.get('GAUSSIAN_TYPE', ['nearst', 'gt_center']) + # The iou branch is only used for Waymo dataset + self.iou_branch = self.model_cfg.get('IOU_BRANCH', False) + if self.iou_branch: + self.rectifier = self.model_cfg.get('RECTIFIER') + nms_configs = self.model_cfg.POST_PROCESSING.NMS_CONFIG + self.nms_configs = [EasyDict(NMS_TYPE=nms_configs.NMS_TYPE, + NMS_THRESH=nms_configs.NMS_THRESH[i], + NMS_PRE_MAXSIZE=nms_configs.NMS_PRE_MAXSIZE[i], + NMS_POST_MAXSIZE=nms_configs.NMS_POST_MAXSIZE[i]) for i in range(num_class)] + + self.double_flip = self.model_cfg.get('DOUBLE_FLIP', False) + for cur_class_names in self.model_cfg.CLASS_NAMES_EACH_HEAD: + self.class_names_each_head.append([x for x in cur_class_names if x in class_names]) + cur_class_id_mapping = torch.from_numpy(np.array( + [self.class_names.index(x) for x in cur_class_names if x in class_names] + )).cuda() + self.class_id_mapping_each_head.append(cur_class_id_mapping) + + total_classes = sum([len(x) for x in self.class_names_each_head]) + assert total_classes == len(self.class_names), f'class_names_each_head={self.class_names_each_head}' + + kernel_size_head = self.model_cfg.get('KERNEL_SIZE_HEAD', 3) + + self.heads_list = nn.ModuleList() + self.separate_head_cfg = self.model_cfg.SEPARATE_HEAD_CFG + for idx, cur_class_names in enumerate(self.class_names_each_head): + cur_head_dict = copy.deepcopy(self.separate_head_cfg.HEAD_DICT) + cur_head_dict['hm'] = dict(out_channels=len(cur_class_names), num_conv=self.model_cfg.NUM_HM_CONV) + self.heads_list.append( + SeparateHeadTS( + input_channels=self.model_cfg.get('SHARED_CONV_CHANNEL', 128), + sep_head_dict=cur_head_dict, + kernel_size=kernel_size_head, + init_bias=-2.19, + use_bias=self.model_cfg.get('USE_BIAS_BEFORE_NORM', False), + ) + ) + self.predict_boxes_when_training = predict_boxes_when_training + self.forward_ret_dict = {} + self.build_losses() + + def build_losses(self): + self.add_module('hm_loss_func', loss_utils.FocalLossSparse()) + self.add_module('reg_loss_func', loss_utils.RegLossSparse()) + if self.iou_branch: + self.add_module('crit_iou', loss_utils.IouLossSparse()) + self.add_module('crit_iou_reg', loss_utils.IouRegLossSparse()) + + def assign_targets(self, gt_boxes, num_voxels, spatial_indices, spatial_shape): + """ + Args: + gt_boxes: (B, M, 8) + Returns: + """ + target_assigner_cfg = self.model_cfg.TARGET_ASSIGNER_CONFIG + + batch_size = gt_boxes.shape[0] + ret_dict = { + 'heatmaps': [], + 'target_boxes': [], + 'inds': [], + 'masks': [], + 'heatmap_masks': [], + 'gt_boxes': [] + } + + all_names = np.array(['bg', *self.class_names]) + for idx, cur_class_names in enumerate(self.class_names_each_head): + heatmap_list, target_boxes_list, inds_list, masks_list, gt_boxes_list = [], [], [], [], [] + for bs_idx in range(batch_size): + cur_gt_boxes = gt_boxes[bs_idx] + gt_class_names = all_names[cur_gt_boxes[:, -1].cpu().long().numpy()] + + gt_boxes_single_head = [] + + for idx, name in enumerate(gt_class_names): + if name not in cur_class_names: + continue + temp_box = cur_gt_boxes[idx] + temp_box[-1] = cur_class_names.index(name) + 1 + gt_boxes_single_head.append(temp_box[None, :]) + + if len(gt_boxes_single_head) == 0: + gt_boxes_single_head = cur_gt_boxes[:0, :] + else: + gt_boxes_single_head = torch.cat(gt_boxes_single_head, dim=0) + + heatmap, ret_boxes, inds, mask = self.assign_target_of_single_head( + num_classes=len(cur_class_names), gt_boxes=gt_boxes_single_head, + num_voxels=num_voxels[bs_idx], spatial_indices=spatial_indices[bs_idx], + spatial_shape=spatial_shape, + feature_map_stride=target_assigner_cfg.FEATURE_MAP_STRIDE, + num_max_objs=target_assigner_cfg.NUM_MAX_OBJS, + gaussian_overlap=target_assigner_cfg.GAUSSIAN_OVERLAP, + min_radius=target_assigner_cfg.MIN_RADIUS, + ) + heatmap_list.append(heatmap.to(gt_boxes_single_head.device)) + target_boxes_list.append(ret_boxes.to(gt_boxes_single_head.device)) + inds_list.append(inds.to(gt_boxes_single_head.device)) + masks_list.append(mask.to(gt_boxes_single_head.device)) + gt_boxes_list.append(gt_boxes_single_head[:, :-1]) + + ret_dict['heatmaps'].append(torch.cat(heatmap_list, dim=1).permute(1, 0)) + ret_dict['target_boxes'].append(torch.stack(target_boxes_list, dim=0)) + ret_dict['inds'].append(torch.stack(inds_list, dim=0)) + ret_dict['masks'].append(torch.stack(masks_list, dim=0)) + ret_dict['gt_boxes'].append(gt_boxes_list) + + return ret_dict + + def distance(self, voxel_indices, center): + distances = ((voxel_indices - center.unsqueeze(0))**2).sum(-1) + return distances + + def assign_target_of_single_head( + self, num_classes, gt_boxes, num_voxels, spatial_indices, spatial_shape, feature_map_stride, num_max_objs=500, + gaussian_overlap=0.1, min_radius=2 + ): + """ + Args: + gt_boxes: (N, 8) + feature_map_size: (2), [x, y] + + Returns: + + """ + heatmap = gt_boxes.new_zeros(num_classes, num_voxels) + + ret_boxes = gt_boxes.new_zeros((num_max_objs, gt_boxes.shape[-1] - 1 + 1)) + inds = gt_boxes.new_zeros(num_max_objs).long() + mask = gt_boxes.new_zeros(num_max_objs).long() + + x, y, z = gt_boxes[:, 0], gt_boxes[:, 1], gt_boxes[:, 2] + coord_x = (x - self.point_cloud_range[0]) / self.voxel_size[0] / feature_map_stride + coord_y = (y - self.point_cloud_range[1]) / self.voxel_size[1] / feature_map_stride + + coord_x = torch.clamp(coord_x, min=0, max=spatial_shape[1] - 0.5) # bugfixed: 1e-6 does not work for center.int() + coord_y = torch.clamp(coord_y, min=0, max=spatial_shape[0] - 0.5) # + + center = torch.cat((coord_x[:, None], coord_y[:, None]), dim=-1) + center_int = center.int() + center_int_float = center_int.float() + + dx, dy, dz = gt_boxes[:, 3], gt_boxes[:, 4], gt_boxes[:, 5] + dx = dx / self.voxel_size[0] / feature_map_stride + dy = dy / self.voxel_size[1] / feature_map_stride + + radius = centernet_utils.gaussian_radius(dx, dy, min_overlap=gaussian_overlap) + radius = torch.clamp_min(radius.int(), min=min_radius) + + for k in range(min(num_max_objs, gt_boxes.shape[0])): + if dx[k] <= 0 or dy[k] <= 0: + continue + + if not (0 <= center_int[k][0] <= spatial_shape[1] and 0 <= center_int[k][1] <= spatial_shape[0]): + continue + + cur_class_id = (gt_boxes[k, -1] - 1).long() + distance = self.distance(spatial_indices, center[k]) + inds[k] = distance.argmin() + mask[k] = 1 + + if 'gt_center' in self.gaussian_type: + centernet_utils.draw_gaussian_to_heatmap_voxels(heatmap[cur_class_id], distance, radius[k].item() * self.gaussian_ratio) + + if 'nearst' in self.gaussian_type: + centernet_utils.draw_gaussian_to_heatmap_voxels(heatmap[cur_class_id], self.distance(spatial_indices, spatial_indices[inds[k]]), radius[k].item() * self.gaussian_ratio) + + ret_boxes[k, 0:2] = center[k] - spatial_indices[inds[k]][:2] + ret_boxes[k, 2] = z[k] + ret_boxes[k, 3:6] = gt_boxes[k, 3:6].log() + ret_boxes[k, 6] = torch.cos(gt_boxes[k, 6]) + ret_boxes[k, 7] = torch.sin(gt_boxes[k, 6]) + if gt_boxes.shape[1] > 8: + ret_boxes[k, 8:] = gt_boxes[k, 7:-1] + + return heatmap, ret_boxes, inds, mask + + def sigmoid(self, x): + y = torch.clamp(x.sigmoid(), min=1e-4, max=1 - 1e-4) + return y + + def get_loss(self): + pred_dicts = self.forward_ret_dict['pred_dicts'] + target_dicts = self.forward_ret_dict['target_dicts'] + batch_index = self.forward_ret_dict['batch_index'] + + tb_dict = {} + loss = 0 + batch_indices = self.forward_ret_dict['voxel_indices'][:, 0] + spatial_indices = self.forward_ret_dict['voxel_indices'][:, 1:] + + for idx, pred_dict in enumerate(pred_dicts): + pred_dict['hm'] = self.sigmoid(pred_dict['hm']) + hm_loss = self.hm_loss_func(pred_dict['hm'], target_dicts['heatmaps'][idx]) + hm_loss *= self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS['cls_weight'] + + target_boxes = target_dicts['target_boxes'][idx] + pred_boxes = torch.cat([pred_dict[head_name] for head_name in self.separate_head_cfg.HEAD_ORDER], dim=1) + + reg_loss = self.reg_loss_func( + pred_boxes, target_dicts['masks'][idx], target_dicts['inds'][idx], target_boxes, batch_index + ) + loc_loss = (reg_loss * reg_loss.new_tensor(self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS['code_weights'])).sum() + loc_loss = loc_loss * self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS['loc_weight'] + tb_dict['hm_loss_head_%d' % idx] = hm_loss.item() + tb_dict['loc_loss_head_%d' % idx] = loc_loss.item() + if self.iou_branch: + batch_box_preds = self._get_predicted_boxes(pred_dict, spatial_indices) + pred_boxes_for_iou = batch_box_preds.detach() + iou_loss = self.crit_iou(pred_dict['iou'], target_dicts['masks'][idx], target_dicts['inds'][idx], + pred_boxes_for_iou, target_dicts['gt_boxes'][idx], batch_indices) + + iou_reg_loss = self.crit_iou_reg(batch_box_preds, target_dicts['masks'][idx], target_dicts['inds'][idx], + target_dicts['gt_boxes'][idx], batch_indices) + iou_weight = self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS['iou_weight'] if 'iou_weight' in self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS else self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS['loc_weight'] + iou_reg_loss = iou_reg_loss * iou_weight #self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS['loc_weight'] + + loss += (hm_loss + loc_loss + iou_loss + iou_reg_loss) + tb_dict['iou_loss_head_%d' % idx] = iou_loss.item() + tb_dict['iou_reg_loss_head_%d' % idx] = iou_reg_loss.item() + else: + loss += hm_loss + loc_loss + + tb_dict['rpn_loss'] = loss.item() + return loss, tb_dict + + def _get_predicted_boxes(self, pred_dict, spatial_indices): + center = pred_dict['center'] + center_z = pred_dict['center_z'] + #dim = pred_dict['dim'].exp() + dim = torch.exp(torch.clamp(pred_dict['dim'], min=-5, max=5)) + rot_cos = pred_dict['rot'][:, 0].unsqueeze(dim=1) + rot_sin = pred_dict['rot'][:, 1].unsqueeze(dim=1) + angle = torch.atan2(rot_sin, rot_cos) + xs = (spatial_indices[:, 1:2] + center[:, 0:1]) * self.feature_map_stride * self.voxel_size[0] + self.point_cloud_range[0] + ys = (spatial_indices[:, 0:1] + center[:, 1:2]) * self.feature_map_stride * self.voxel_size[1] + self.point_cloud_range[1] + + box_part_list = [xs, ys, center_z, dim, angle] + pred_box = torch.cat((box_part_list), dim=-1) + return pred_box + + def rotate_class_specific_nms_iou(self, boxes, scores, iou_preds, labels, rectifier, nms_configs): + """ + :param boxes: (N, 5) [x, y, z, l, w, h, theta] + :param scores: (N) + :param thresh: + :return: + """ + assert isinstance(rectifier, list) + + box_preds_list, scores_list, labels_list = [], [], [] + for cls in range(self.num_class): + mask = labels == cls + boxes_cls = boxes[mask] + scores_cls = torch.pow(scores[mask], 1 - rectifier[cls]) * torch.pow(iou_preds[mask].squeeze(-1), rectifier[cls]) + labels_cls = labels[mask] + + selected, selected_scores = model_nms_utils.class_agnostic_nms(box_scores=scores_cls, box_preds=boxes_cls, + nms_config=nms_configs[cls], score_thresh=None) + + box_preds_list.append(boxes_cls[selected]) + scores_list.append(scores_cls[selected]) + labels_list.append(labels_cls[selected]) + + return torch.cat(box_preds_list, dim=0), torch.cat(scores_list, dim=0), torch.cat(labels_list, dim=0) + + def merge_double_flip(self, pred_dict, batch_size, voxel_indices, spatial_shape): + # spatial_shape (Z, Y, X) + pred_dict['hm'] = pred_dict['hm'].sigmoid() + pred_dict['dim'] = pred_dict['dim'].exp() + + batch_indices = voxel_indices[:, 0] + spatial_indices = voxel_indices[:, 1:] + + pred_dict_ = {k: [] for k in pred_dict.keys()} + counts = [] + spatial_indices_ = [] + for bs_idx in range(batch_size): + spatial_indices_batch = [] + pred_dict_batch = {k: [] for k in pred_dict.keys()} + for i in range(4): + bs_indices = batch_indices == (bs_idx * 4 + i) + if i in [1, 3]: + spatial_indices[bs_indices, 0] = spatial_shape[0] - spatial_indices[bs_indices, 0] + if i in [2, 3]: + spatial_indices[bs_indices, 1] = spatial_shape[1] - spatial_indices[bs_indices, 1] + + if i == 1: + pred_dict['center'][bs_indices, 1] = - pred_dict['center'][bs_indices, 1] + pred_dict['rot'][bs_indices, 1] *= -1 + pred_dict['vel'][bs_indices, 1] *= -1 + + if i == 2: + pred_dict['center'][bs_indices, 0] = - pred_dict['center'][bs_indices, 0] + pred_dict['rot'][bs_indices, 0] *= -1 + pred_dict['vel'][bs_indices, 0] *= -1 + + if i == 3: + pred_dict['center'][bs_indices, 0] = - pred_dict['center'][bs_indices, 0] + pred_dict['center'][bs_indices, 1] = - pred_dict['center'][bs_indices, 1] + + pred_dict['rot'][bs_indices, 1] *= -1 + pred_dict['rot'][bs_indices, 0] *= -1 + + pred_dict['vel'][bs_indices] *= -1 + + spatial_indices_batch.append(spatial_indices[bs_indices]) + + for k in pred_dict.keys(): + pred_dict_batch[k].append(pred_dict[k][bs_indices]) + + spatial_indices_batch = torch.cat(spatial_indices_batch) + + spatial_indices_unique, _inv, count = torch.unique(spatial_indices_batch, dim=0, return_inverse=True, + return_counts=True) + spatial_indices_.append(spatial_indices_unique) + counts.append(count) + for k in pred_dict.keys(): + pred_dict_batch[k] = torch.cat(pred_dict_batch[k]) + features_unique = pred_dict_batch[k].new_zeros( + (spatial_indices_unique.shape[0], pred_dict_batch[k].shape[1])) + features_unique.index_add_(0, _inv, pred_dict_batch[k]) + pred_dict_[k].append(features_unique) + + for k in pred_dict.keys(): + pred_dict_[k] = torch.cat(pred_dict_[k]) + counts = torch.cat(counts).unsqueeze(-1).float() + voxel_indices_ = torch.cat([torch.cat( + [torch.full((indices.shape[0], 1), i, device=indices.device, dtype=indices.dtype), indices], dim=1 + ) for i, indices in enumerate(spatial_indices_)]) + + batch_hm = pred_dict_['hm'] + batch_center = pred_dict_['center'] + batch_center_z = pred_dict_['center_z'] + batch_dim = pred_dict_['dim'] + batch_rot_cos = pred_dict_['rot'][:, 0].unsqueeze(dim=1) + batch_rot_sin = pred_dict_['rot'][:, 1].unsqueeze(dim=1) + batch_vel = pred_dict_['vel'] if 'vel' in self.separate_head_cfg.HEAD_ORDER else None + + batch_hm /= counts + batch_center /= counts + batch_center_z /= counts + batch_dim /= counts + batch_rot_cos /= counts + batch_rot_sin /= counts + + if not batch_vel is None: + batch_vel /= counts + + return batch_hm, batch_center, batch_center_z, batch_dim, batch_rot_cos, batch_rot_sin, batch_vel, None, voxel_indices_ + + def generate_predicted_boxes(self, batch_size, pred_dicts, voxel_indices, spatial_shape): + post_process_cfg = self.model_cfg.POST_PROCESSING + post_center_limit_range = torch.tensor(post_process_cfg.POST_CENTER_LIMIT_RANGE).cuda().float() + + ret_dict = [{ + 'pred_boxes': [], + 'pred_scores': [], + 'pred_labels': [], + 'pred_ious': [], + } for k in range(batch_size)] + for idx, pred_dict in enumerate(pred_dicts): + if self.double_flip: + batch_hm, batch_center, batch_center_z, batch_dim, batch_rot_cos, batch_rot_sin, batch_vel, batch_iou, voxel_indices_ = \ + self.merge_double_flip(pred_dict, batch_size, voxel_indices.clone(), spatial_shape) + else: + batch_hm = pred_dict['hm'].sigmoid() + batch_center = pred_dict['center'] + batch_center_z = pred_dict['center_z'] + batch_dim = pred_dict['dim'].exp() + batch_rot_cos = pred_dict['rot'][:, 0].unsqueeze(dim=1) + batch_rot_sin = pred_dict['rot'][:, 1].unsqueeze(dim=1) + batch_iou = (pred_dict['iou'] + 1) * 0.5 if self.iou_branch else None + batch_vel = pred_dict['vel'] if 'vel' in self.separate_head_cfg.HEAD_ORDER else None + voxel_indices_ = voxel_indices[:,:-1] + + final_pred_dicts = centernet_utils.decode_bbox_from_voxels_nuscenes( + batch_size=batch_size, indices=voxel_indices_, + obj=batch_hm, + rot_cos=batch_rot_cos, + rot_sin=batch_rot_sin, + center=batch_center, center_z=batch_center_z, + dim=batch_dim, vel=batch_vel, iou=batch_iou, + point_cloud_range=self.point_cloud_range, voxel_size=self.voxel_size, + feature_map_stride=self.feature_map_stride, + K=post_process_cfg.MAX_OBJ_PER_SAMPLE, + #circle_nms=(post_process_cfg.NMS_CONFIG.NMS_TYPE == 'circle_nms'), + score_thresh=post_process_cfg.SCORE_THRESH, + post_center_limit_range=post_center_limit_range + ) + + for k, final_dict in enumerate(final_pred_dicts): + final_dict['pred_labels'] = self.class_id_mapping_each_head[idx][final_dict['pred_labels'].long()] + if not self.iou_branch: + selected, selected_scores = model_nms_utils.class_agnostic_nms( + box_scores=final_dict['pred_scores'], box_preds=final_dict['pred_boxes'], + nms_config=post_process_cfg.NMS_CONFIG, + score_thresh=None + ) + + final_dict['pred_boxes'] = final_dict['pred_boxes'][selected] + final_dict['pred_scores'] = selected_scores + final_dict['pred_labels'] = final_dict['pred_labels'][selected] + + ret_dict[k]['pred_boxes'].append(final_dict['pred_boxes']) + ret_dict[k]['pred_scores'].append(final_dict['pred_scores']) + ret_dict[k]['pred_labels'].append(final_dict['pred_labels']) + ret_dict[k]['pred_ious'].append(final_dict['pred_ious']) + + for k in range(batch_size): + pred_boxes = torch.cat(ret_dict[k]['pred_boxes'], dim=0) + pred_scores = torch.cat(ret_dict[k]['pred_scores'], dim=0) + pred_labels = torch.cat(ret_dict[k]['pred_labels'], dim=0) + if self.iou_branch: + pred_ious = torch.cat(ret_dict[k]['pred_ious'], dim=0) + pred_boxes, pred_scores, pred_labels = self.rotate_class_specific_nms_iou(pred_boxes, pred_scores, pred_ious, pred_labels, self.rectifier, self.nms_configs) + + ret_dict[k]['pred_boxes'] = pred_boxes + ret_dict[k]['pred_scores'] = pred_scores + ret_dict[k]['pred_labels'] = pred_labels + 1 + + return ret_dict + + @staticmethod + def reorder_rois_for_refining(batch_size, pred_dicts): + num_max_rois = max([len(cur_dict['pred_boxes']) for cur_dict in pred_dicts]) + num_max_rois = max(1, num_max_rois) # at least one faked rois to avoid error + pred_boxes = pred_dicts[0]['pred_boxes'] + + rois = pred_boxes.new_zeros((batch_size, num_max_rois, pred_boxes.shape[-1])) + roi_scores = pred_boxes.new_zeros((batch_size, num_max_rois)) + roi_labels = pred_boxes.new_zeros((batch_size, num_max_rois)).long() + + for bs_idx in range(batch_size): + num_boxes = len(pred_dicts[bs_idx]['pred_boxes']) + + rois[bs_idx, :num_boxes, :] = pred_dicts[bs_idx]['pred_boxes'] + roi_scores[bs_idx, :num_boxes] = pred_dicts[bs_idx]['pred_scores'] + roi_labels[bs_idx, :num_boxes] = pred_dicts[bs_idx]['pred_labels'] + return rois, roi_scores, roi_labels + + def _get_voxel_infos(self, x): + spatial_shape = x.spatial_range[1:] # Might need to include channels + voxel_indices = x.coords + # print(x.coords) + spatial_indices = [] + num_voxels = [] + batch_size = x.spatial_range[0] + batch_index = voxel_indices[:, 0] + + for bs_idx in range(batch_size): + batch_inds = batch_index==bs_idx + # print(voxel_indices) + # spatial_indices.append(voxel_indices[batch_inds][:, [2, 1]]) + + spatial_indices.append(voxel_indices[batch_inds][:, [2, 1]]) + num_voxels.append(batch_inds.sum()) + + return spatial_shape, batch_index, voxel_indices, spatial_indices, num_voxels + + def forward(self, data_dict): + x = data_dict['encoded_spconv_tensor'] + + spatial_shape, batch_index, voxel_indices, spatial_indices, num_voxels = self._get_voxel_infos(x) + self.forward_ret_dict['batch_index'] = batch_index + + pred_dicts = [] + for head in self.heads_list: + pred_dicts.append( + head(x) + ) + + if self.training: + target_dict = self.assign_targets( + data_dict['gt_boxes'], num_voxels, spatial_indices, spatial_shape + ) + self.forward_ret_dict['target_dicts'] = target_dict + + self.forward_ret_dict['pred_dicts'] = pred_dicts + self.forward_ret_dict['voxel_indices'] = voxel_indices + + if not self.training or self.predict_boxes_when_training: + if self.double_flip: + data_dict['batch_size'] = data_dict['batch_size'] // 4 + pred_dicts = self.generate_predicted_boxes( + data_dict['batch_size'], + pred_dicts, voxel_indices, spatial_shape + ) + + if self.predict_boxes_when_training: + rois, roi_scores, roi_labels = self.reorder_rois_for_refining(data_dict['batch_size'], pred_dicts) + data_dict['rois'] = rois + data_dict['roi_scores'] = roi_scores + data_dict['roi_labels'] = roi_labels + data_dict['has_class_labels'] = True + else: + data_dict['final_box_dicts'] = pred_dicts + + return data_dict diff --git a/examples/openpcdet/pcdet_plugin/models/detectors/__init__.py b/examples/openpcdet/pcdet_plugin/models/detectors/__init__.py new file mode 100644 index 0000000..3cb2fc2 --- /dev/null +++ b/examples/openpcdet/pcdet_plugin/models/detectors/__init__.py @@ -0,0 +1 @@ +from .detector3d_template import Detector3DTemplate \ No newline at end of file diff --git a/examples/openpcdet/pcdet_plugin/models/detectors/detector3d_template.py b/examples/openpcdet/pcdet_plugin/models/detectors/detector3d_template.py new file mode 100644 index 0000000..732a1bf --- /dev/null +++ b/examples/openpcdet/pcdet_plugin/models/detectors/detector3d_template.py @@ -0,0 +1,401 @@ +import os + +import torch +import torch.nn as nn +import numpy as np +from pcdet.ops.iou3d_nms import iou3d_nms_utils +from pcdet.utils.spconv_utils import find_all_spconv_keys +from pcdet.models import backbones_2d, backbones_3d, dense_heads, roi_heads +from pcdet.models.backbones_2d import map_to_bev +from pcdet.models.backbones_3d import pfe, vfe +from pcdet.models.model_utils import model_nms_utils + +class Detector3DTemplate(nn.Module): + def __init__(self, model_cfg, num_class, dataset): + super().__init__() + self.model_cfg = model_cfg + self.num_class = num_class + self.dataset = dataset + self.class_names = dataset.class_names + self.register_buffer('global_step', torch.LongTensor(1).zero_()) + + self.module_topology = [ + 'vfe', 'backbone_3d', 'map_to_bev_module', 'pfe', + 'backbone_2d', 'dense_head', 'point_head', 'roi_head' + ] + + @property + def mode(self): + return 'TRAIN' if self.training else 'TEST' + + def update_global_step(self): + self.global_step += 1 + + def build_networks(self): + model_info_dict = { + 'module_list': [], + 'num_rawpoint_features': self.dataset.point_feature_encoder.num_point_features, + 'num_point_features': self.dataset.point_feature_encoder.num_point_features, + 'grid_size': self.dataset.grid_size, + 'point_cloud_range': self.dataset.point_cloud_range, + 'voxel_size': self.dataset.voxel_size, + 'depth_downsample_factor': self.dataset.depth_downsample_factor + } + for module_name in self.module_topology: + module, model_info_dict = getattr(self, 'build_%s' % module_name)( + model_info_dict=model_info_dict + ) + self.add_module(module_name, module) + return model_info_dict['module_list'] + # @Yingqi: Where the model is built + def build_vfe(self, model_info_dict): + if self.model_cfg.get('VFE', None) is None: + return None, model_info_dict + + vfe_module = vfe.__all__[self.model_cfg.VFE.NAME]( + model_cfg=self.model_cfg.VFE, + num_point_features=model_info_dict['num_rawpoint_features'], + point_cloud_range=model_info_dict['point_cloud_range'], + voxel_size=model_info_dict['voxel_size'], + grid_size=model_info_dict['grid_size'], + depth_downsample_factor=model_info_dict['depth_downsample_factor'] + ) + model_info_dict['num_point_features'] = vfe_module.get_output_feature_dim() + model_info_dict['module_list'].append(vfe_module) + return vfe_module, model_info_dict + + def build_backbone_3d(self, model_info_dict): + if self.model_cfg.get('BACKBONE_3D', None) is None: + return None, model_info_dict + backbone_3d_module = backbones_3d.__all__[self.model_cfg.BACKBONE_3D.NAME]( # Build the 3d backbone based with the reference to the modules imported in the backbone_3d module + model_cfg=self.model_cfg.BACKBONE_3D, + input_channels=model_info_dict['num_point_features'], + grid_size=model_info_dict['grid_size'], + voxel_size=model_info_dict['voxel_size'], + point_cloud_range=model_info_dict['point_cloud_range'] + ) + model_info_dict['module_list'].append(backbone_3d_module) + model_info_dict['num_point_features'] = backbone_3d_module.num_point_features + model_info_dict['backbone_channels'] = backbone_3d_module.backbone_channels \ + if hasattr(backbone_3d_module, 'backbone_channels') else None + return backbone_3d_module, model_info_dict + + def build_map_to_bev_module(self, model_info_dict): + if self.model_cfg.get('MAP_TO_BEV', None) is None: + return None, model_info_dict + + map_to_bev_module = map_to_bev.__all__[self.model_cfg.MAP_TO_BEV.NAME]( + model_cfg=self.model_cfg.MAP_TO_BEV, + grid_size=model_info_dict['grid_size'] + ) + model_info_dict['module_list'].append(map_to_bev_module) + model_info_dict['num_bev_features'] = map_to_bev_module.num_bev_features + return map_to_bev_module, model_info_dict + + def build_backbone_2d(self, model_info_dict): + if self.model_cfg.get('BACKBONE_2D', None) is None: + return None, model_info_dict + + backbone_2d_module = backbones_2d.__all__[self.model_cfg.BACKBONE_2D.NAME]( + model_cfg=self.model_cfg.BACKBONE_2D, + input_channels=model_info_dict.get('num_bev_features', None) + ) + model_info_dict['module_list'].append(backbone_2d_module) + model_info_dict['num_bev_features'] = backbone_2d_module.num_bev_features + return backbone_2d_module, model_info_dict + + def build_pfe(self, model_info_dict): + if self.model_cfg.get('PFE', None) is None: + return None, model_info_dict + + pfe_module = pfe.__all__[self.model_cfg.PFE.NAME]( + model_cfg=self.model_cfg.PFE, + voxel_size=model_info_dict['voxel_size'], + point_cloud_range=model_info_dict['point_cloud_range'], + num_bev_features=model_info_dict['num_bev_features'], + num_rawpoint_features=model_info_dict['num_rawpoint_features'] + ) + model_info_dict['module_list'].append(pfe_module) + model_info_dict['num_point_features'] = pfe_module.num_point_features + model_info_dict['num_point_features_before_fusion'] = pfe_module.num_point_features_before_fusion + return pfe_module, model_info_dict + + def build_dense_head(self, model_info_dict): + if self.model_cfg.get('DENSE_HEAD', None) is None: + return None, model_info_dict + dense_head_module = dense_heads.__all__[self.model_cfg.DENSE_HEAD.NAME]( + model_cfg=self.model_cfg.DENSE_HEAD, + input_channels=model_info_dict['num_bev_features'] if 'num_bev_features' in model_info_dict else self.model_cfg.DENSE_HEAD.INPUT_FEATURES, + num_class=self.num_class if not self.model_cfg.DENSE_HEAD.CLASS_AGNOSTIC else 1, + class_names=self.class_names, + grid_size=model_info_dict['grid_size'], + point_cloud_range=model_info_dict['point_cloud_range'], + predict_boxes_when_training=self.model_cfg.get('ROI_HEAD', False), + voxel_size=model_info_dict.get('voxel_size', False) + ) + model_info_dict['module_list'].append(dense_head_module) + return dense_head_module, model_info_dict + + def build_point_head(self, model_info_dict): + if self.model_cfg.get('POINT_HEAD', None) is None: + return None, model_info_dict + + if self.model_cfg.POINT_HEAD.get('USE_POINT_FEATURES_BEFORE_FUSION', False): + num_point_features = model_info_dict['num_point_features_before_fusion'] + else: + num_point_features = model_info_dict['num_point_features'] + + point_head_module = dense_heads.__all__[self.model_cfg.POINT_HEAD.NAME]( + model_cfg=self.model_cfg.POINT_HEAD, + input_channels=num_point_features, + num_class=self.num_class if not self.model_cfg.POINT_HEAD.CLASS_AGNOSTIC else 1, + predict_boxes_when_training=self.model_cfg.get('ROI_HEAD', False) + ) + + model_info_dict['module_list'].append(point_head_module) + return point_head_module, model_info_dict + + def build_roi_head(self, model_info_dict): + if self.model_cfg.get('ROI_HEAD', None) is None: + return None, model_info_dict + point_head_module = roi_heads.__all__[self.model_cfg.ROI_HEAD.NAME]( + model_cfg=self.model_cfg.ROI_HEAD, + input_channels=model_info_dict['num_point_features'], + backbone_channels= model_info_dict.get('backbone_channels', None), + point_cloud_range=model_info_dict['point_cloud_range'], + voxel_size=model_info_dict['voxel_size'], + num_class=self.num_class if not self.model_cfg.ROI_HEAD.CLASS_AGNOSTIC else 1, + ) + + model_info_dict['module_list'].append(point_head_module) + return point_head_module, model_info_dict + + def forward(self, **kwargs): + raise NotImplementedError + + def post_processing(self, batch_dict): + """ + Args: + batch_dict: + batch_size: + batch_cls_preds: (B, num_boxes, num_classes | 1) or (N1+N2+..., num_classes | 1) + or [(B, num_boxes, num_class1), (B, num_boxes, num_class2) ...] + multihead_label_mapping: [(num_class1), (num_class2), ...] + batch_box_preds: (B, num_boxes, 7+C) or (N1+N2+..., 7+C) + cls_preds_normalized: indicate whether batch_cls_preds is normalized + batch_index: optional (N1+N2+...) + has_class_labels: True/False + roi_labels: (B, num_rois) 1 .. num_classes + batch_pred_labels: (B, num_boxes, 1) + Returns: + + """ + post_process_cfg = self.model_cfg.POST_PROCESSING + batch_size = batch_dict['batch_size'] + recall_dict = {} + pred_dicts = [] + for index in range(batch_size): + if batch_dict.get('batch_index', None) is not None: + assert batch_dict['batch_box_preds'].shape.__len__() == 2 + batch_mask = (batch_dict['batch_index'] == index) + else: + assert batch_dict['batch_box_preds'].shape.__len__() == 3 + batch_mask = index + + box_preds = batch_dict['batch_box_preds'][batch_mask] + src_box_preds = box_preds + + if not isinstance(batch_dict['batch_cls_preds'], list): + cls_preds = batch_dict['batch_cls_preds'][batch_mask] + + src_cls_preds = cls_preds + assert cls_preds.shape[1] in [1, self.num_class] + + if not batch_dict['cls_preds_normalized']: + cls_preds = torch.sigmoid(cls_preds) + else: + cls_preds = [x[batch_mask] for x in batch_dict['batch_cls_preds']] + src_cls_preds = cls_preds + if not batch_dict['cls_preds_normalized']: + cls_preds = [torch.sigmoid(x) for x in cls_preds] + + if post_process_cfg.NMS_CONFIG.MULTI_CLASSES_NMS: + if not isinstance(cls_preds, list): + cls_preds = [cls_preds] + multihead_label_mapping = [torch.arange(1, self.num_class, device=cls_preds[0].device)] + else: + multihead_label_mapping = batch_dict['multihead_label_mapping'] + + cur_start_idx = 0 + pred_scores, pred_labels, pred_boxes = [], [], [] + for cur_cls_preds, cur_label_mapping in zip(cls_preds, multihead_label_mapping): + assert cur_cls_preds.shape[1] == len(cur_label_mapping) + cur_box_preds = box_preds[cur_start_idx: cur_start_idx + cur_cls_preds.shape[0]] + cur_pred_scores, cur_pred_labels, cur_pred_boxes = model_nms_utils.multi_classes_nms( + cls_scores=cur_cls_preds, box_preds=cur_box_preds, + nms_config=post_process_cfg.NMS_CONFIG, + score_thresh=post_process_cfg.SCORE_THRESH + ) + cur_pred_labels = cur_label_mapping[cur_pred_labels] + pred_scores.append(cur_pred_scores) + pred_labels.append(cur_pred_labels) + pred_boxes.append(cur_pred_boxes) + cur_start_idx += cur_cls_preds.shape[0] + + final_scores = torch.cat(pred_scores, dim=0) + final_labels = torch.cat(pred_labels, dim=0) + final_boxes = torch.cat(pred_boxes, dim=0) + else: + cls_preds, label_preds = torch.max(cls_preds, dim=-1) + if batch_dict.get('has_class_labels', False): + label_key = 'roi_labels' if 'roi_labels' in batch_dict else 'batch_pred_labels' + label_preds = batch_dict[label_key][index] + else: + label_preds = label_preds + 1 + selected, selected_scores = model_nms_utils.class_agnostic_nms( + box_scores=cls_preds, box_preds=box_preds, + nms_config=post_process_cfg.NMS_CONFIG, + score_thresh=post_process_cfg.SCORE_THRESH + ) + + if post_process_cfg.OUTPUT_RAW_SCORE: + max_cls_preds, _ = torch.max(src_cls_preds, dim=-1) + selected_scores = max_cls_preds[selected] + + final_scores = selected_scores + final_labels = label_preds[selected] + final_boxes = box_preds[selected] + + recall_dict = self.generate_recall_record( + box_preds=final_boxes if 'rois' not in batch_dict else src_box_preds, + recall_dict=recall_dict, batch_index=index, data_dict=batch_dict, + thresh_list=post_process_cfg.RECALL_THRESH_LIST + ) + + record_dict = { + 'pred_boxes': final_boxes, + 'pred_scores': final_scores, + 'pred_labels': final_labels + } + pred_dicts.append(record_dict) + + return pred_dicts, recall_dict + + @staticmethod + def generate_recall_record(box_preds, recall_dict, batch_index, data_dict=None, thresh_list=None): + if 'gt_boxes' not in data_dict: + return recall_dict + + rois = data_dict['rois'][batch_index] if 'rois' in data_dict else None + gt_boxes = data_dict['gt_boxes'][batch_index] + + if recall_dict.__len__() == 0: + recall_dict = {'gt': 0} + for cur_thresh in thresh_list: + recall_dict['roi_%s' % (str(cur_thresh))] = 0 + recall_dict['rcnn_%s' % (str(cur_thresh))] = 0 + + cur_gt = gt_boxes + k = cur_gt.__len__() - 1 + while k >= 0 and cur_gt[k].sum() == 0: + k -= 1 + cur_gt = cur_gt[:k + 1] + + if cur_gt.shape[0] > 0: + if box_preds.shape[0] > 0: + iou3d_rcnn = iou3d_nms_utils.boxes_iou3d_gpu(box_preds[:, 0:7], cur_gt[:, 0:7]) + else: + iou3d_rcnn = torch.zeros((0, cur_gt.shape[0])) + + if rois is not None: + iou3d_roi = iou3d_nms_utils.boxes_iou3d_gpu(rois[:, 0:7], cur_gt[:, 0:7]) + + for cur_thresh in thresh_list: + if iou3d_rcnn.shape[0] == 0: + recall_dict['rcnn_%s' % str(cur_thresh)] += 0 + else: + rcnn_recalled = (iou3d_rcnn.max(dim=0)[0] > cur_thresh).sum().item() + recall_dict['rcnn_%s' % str(cur_thresh)] += rcnn_recalled + if rois is not None: + roi_recalled = (iou3d_roi.max(dim=0)[0] > cur_thresh).sum().item() + recall_dict['roi_%s' % str(cur_thresh)] += roi_recalled + + recall_dict['gt'] += cur_gt.shape[0] + else: + gt_iou = box_preds.new_zeros(box_preds.shape[0]) + return recall_dict + + def _load_state_dict(self, model_state_disk, *, strict=True): + state_dict = self.state_dict() # local cache of state_dict + + spconv_keys = find_all_spconv_keys(self) + + update_model_state = {} + update_model_state = model_state_disk # If use TS, we convert weights ourself + + if strict: + print(self) + self.load_state_dict(update_model_state, strict=True ) + else: + state_dict.update(update_model_state) + self.load_state_dict(state_dict, strict=True) + return state_dict, update_model_state + + def load_params_from_file(self, filename, logger, to_cpu=False, pre_trained_path=None): + if not os.path.isfile(filename): + raise FileNotFoundError + + logger.info('==> Loading parameters from checkpoint %s to %s' % (filename, 'CPU' if to_cpu else 'GPU')) + loc_type = torch.device('cpu') if to_cpu else None + checkpoint = torch.load(filename, map_location=loc_type) + try: + model_state_disk = checkpoint['model_state'] # @Yingqi: sometimes we need to comment it off such as VoxelNeXt Argo, weird ['model_state'] + except: + model_state_disk = checkpoint # For argoverse2 VoxelNeXt model. + if not pre_trained_path is None: + pretrain_checkpoint = torch.load(pre_trained_path, map_location=loc_type) + pretrain_model_state_disk = pretrain_checkpoint['model_state'] + model_state_disk.update(pretrain_model_state_disk) + + version = checkpoint.get("version", None) + if version is not None: + logger.info('==> Checkpoint trained from version: %s' % version) + + state_dict, update_model_state = self._load_state_dict(model_state_disk, strict=True) # @Yingqi: Trun on strict loading mode + + for key in state_dict: + if key not in update_model_state: + logger.info('Not updated weight %s: %s' % (key, str(state_dict[key].shape))) + + logger.info('==> Done (loaded %d/%d)' % (len(update_model_state), len(state_dict))) + + def load_params_with_optimizer(self, filename, to_cpu=False, optimizer=None, logger=None): + if not os.path.isfile(filename): + raise FileNotFoundError + + logger.info('==> Loading parameters from checkpoint %s to %s' % (filename, 'CPU' if to_cpu else 'GPU')) + loc_type = torch.device('cpu') if to_cpu else None + checkpoint = torch.load(filename, map_location=loc_type) + epoch = checkpoint.get('epoch', -1) + it = checkpoint.get('it', 0.0) + + self._load_state_dict(checkpoint['model_state'], strict=True) + + if optimizer is not None: + if 'optimizer_state' in checkpoint and checkpoint['optimizer_state'] is not None: + logger.info('==> Loading optimizer parameters from checkpoint %s to %s' + % (filename, 'CPU' if to_cpu else 'GPU')) + optimizer.load_state_dict(checkpoint['optimizer_state']) + else: + assert filename[-4] == '.', filename + src_file, ext = filename[:-4], filename[-3:] + optimizer_filename = '%s_optim.%s' % (src_file, ext) + if os.path.exists(optimizer_filename): + optimizer_ckpt = torch.load(optimizer_filename, map_location=loc_type) + optimizer.load_state_dict(optimizer_ckpt['optimizer_state']) + + if 'version' in checkpoint: + print('==> Checkpoint trained from version: %s' % checkpoint['version']) + logger.info('==> Done') + + return it, epoch diff --git a/examples/openpcdet/pcdet_plugin/models/roi_heads/__init__.py b/examples/openpcdet/pcdet_plugin/models/roi_heads/__init__.py new file mode 100644 index 0000000..40b96b0 --- /dev/null +++ b/examples/openpcdet/pcdet_plugin/models/roi_heads/__init__.py @@ -0,0 +1 @@ +from .partA2_head import PartA2FCHeadTS \ No newline at end of file diff --git a/examples/openpcdet/pcdet_plugin/models/roi_heads/partA2_head.py b/examples/openpcdet/pcdet_plugin/models/roi_heads/partA2_head.py new file mode 100644 index 0000000..77aa1a1 --- /dev/null +++ b/examples/openpcdet/pcdet_plugin/models/roi_heads/partA2_head.py @@ -0,0 +1,252 @@ +import torch +import numpy as np +import torch.nn as nn +import torchsparse +import torchsparse.nn as spnn +from pcdet.models.roi_heads import RoIHeadTemplate +from pcdet.ops.roiaware_pool3d import roiaware_pool3d_utils + + +class PartA2FCHeadTS(RoIHeadTemplate): + def __init__(self, input_channels, model_cfg, num_class=1, **kwargs): + super().__init__(num_class=num_class, model_cfg=model_cfg) + self.model_cfg = model_cfg + + self.SA_modules = nn.ModuleList() + block = self.post_act_block_ts + + c0 = self.model_cfg.ROI_AWARE_POOL.NUM_FEATURES // 2 + self.conv_part = nn.Sequential( + block(4, 64, 3, padding=1), + block(64, c0, 3, padding=1), + ) + self.conv_rpn = nn.Sequential( + block(input_channels, 64, 3, padding=1), + block(64, c0, 3, padding=1), + ) + + shared_fc_list = [] + pool_size = self.model_cfg.ROI_AWARE_POOL.POOL_SIZE + pre_channel = self.model_cfg.ROI_AWARE_POOL.NUM_FEATURES * pool_size * pool_size * pool_size + for k in range(0, self.model_cfg.SHARED_FC.__len__()): + shared_fc_list.extend([ + nn.Conv1d(pre_channel, self.model_cfg.SHARED_FC[k], kernel_size=1, bias=False), + nn.BatchNorm1d(self.model_cfg.SHARED_FC[k]), + nn.ReLU() + ]) + pre_channel = self.model_cfg.SHARED_FC[k] + + if k != self.model_cfg.SHARED_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: + shared_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) + + self.shared_fc_layer = nn.Sequential(*shared_fc_list) + + self.cls_layers = self.make_fc_layers( + input_channels=pre_channel, output_channels=self.num_class, fc_list=self.model_cfg.CLS_FC + ) + self.reg_layers = self.make_fc_layers( + input_channels=pre_channel, + output_channels=self.box_coder.code_size * self.num_class, + fc_list=self.model_cfg.REG_FC + ) + + self.roiaware_pool3d_layer = roiaware_pool3d_utils.RoIAwarePool3d( + out_size=self.model_cfg.ROI_AWARE_POOL.POOL_SIZE, + max_pts_each_voxel=self.model_cfg.ROI_AWARE_POOL.MAX_POINTS_PER_VOXEL + ) + self.init_weights(weight_init='xavier') + + def init_weights(self, weight_init='xavier'): + if weight_init == 'kaiming': + init_func = nn.init.kaiming_normal_ + elif weight_init == 'xavier': + init_func = nn.init.xavier_normal_ + elif weight_init == 'normal': + init_func = nn.init.normal_ + else: + raise NotImplementedError + + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d): + if weight_init == 'normal': + init_func(m.weight, mean=0, std=0.001) + else: + init_func(m.weight) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + nn.init.normal_(self.reg_layers[-1].weight, mean=0, std=0.001) + + # def post_act_block(self, in_channels, out_channels, kernel_size, indice_key, stride=1, padding=0, conv_type='subm'): + # if conv_type == 'subm': + # m = spconv.SparseSequential( + # spconv.SubMConv3d(in_channels, out_channels, kernel_size, bias=False, indice_key=indice_key), + # nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01), + # nn.ReLU(), + # ) + # elif conv_type == 'spconv': + # m = spconv.SparseSequential( + # spconv.SparseConv3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, + # bias=False, indice_key=indice_key), + # nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01), + # nn.ReLU(), + # ) + # elif conv_type == 'inverseconv': + # m = spconv.SparseSequential( + # spconv.SparseInverseConv3d(in_channels, out_channels, kernel_size, + # indice_key=indice_key, bias=False), + # nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01), + # nn.ReLU(), + # ) + # else: + # raise NotImplementedError + # return m + + + def post_act_block_ts(self, in_channels, out_channels, kernel_size, indice_key=None, stride=1, padding=0, + conv_type='tsconv', norm_fn=None): + if norm_fn is None: + norm_fn = spnn.BatchNorm(out_channels, eps=1e-3, momentum=0.01) + + if conv_type == 'tsconv': + conv = spnn.Conv3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False) + elif conv_type == 'inverseconv': + conv = spnn.Conv3d(in_channels, out_channels, kernel_size, bias=False, transposed=True) + else: + raise NotImplementedError + + m = nn.Sequential( + conv, + norm_fn, + spnn.ReLU(), + ) + return m + + def roiaware_pool(self, batch_dict): + """ + Args: + batch_dict: + batch_size: + rois: (B, num_rois, 7 + C) + point_coords: (num_points, 4) [bs_idx, x, y, z] + point_features: (num_points, C) + point_cls_scores: (N1 + N2 + N3 + ..., 1) + point_part_offset: (N1 + N2 + N3 + ..., 3) + Returns: + + """ + batch_size = batch_dict['batch_size'] + batch_idx = batch_dict['point_coords'][:, 0] + point_coords = batch_dict['point_coords'][:, 1:4] + point_features = batch_dict['point_features'] + part_features = torch.cat(( + batch_dict['point_part_offset'] if not self.model_cfg.get('DISABLE_PART', False) else point_coords, + batch_dict['point_cls_scores'].view(-1, 1).detach() + ), dim=1) + part_features[part_features[:, -1] < self.model_cfg.SEG_MASK_SCORE_THRESH, 0:3] = 0 + + rois = batch_dict['rois'] + + pooled_part_features_list, pooled_rpn_features_list = [], [] + + for bs_idx in range(batch_size): + bs_mask = (batch_idx == bs_idx) + cur_point_coords = point_coords[bs_mask] + cur_part_features = part_features[bs_mask] + cur_rpn_features = point_features[bs_mask] + cur_roi = rois[bs_idx][:, 0:7].contiguous() # (N, 7) + + pooled_part_features = self.roiaware_pool3d_layer.forward( + cur_roi, cur_point_coords, cur_part_features, pool_method='avg' + ) # (N, out_x, out_y, out_z, 4) + pooled_rpn_features = self.roiaware_pool3d_layer.forward( + cur_roi, cur_point_coords, cur_rpn_features, pool_method='max' + ) # (N, out_x, out_y, out_z, C) + + pooled_part_features_list.append(pooled_part_features) + pooled_rpn_features_list.append(pooled_rpn_features) + + pooled_part_features = torch.cat(pooled_part_features_list, dim=0) # (B * N, out_x, out_y, out_z, 4) + pooled_rpn_features = torch.cat(pooled_rpn_features_list, dim=0) # (B * N, out_x, out_y, out_z, C) + + return pooled_part_features, pooled_rpn_features + + @staticmethod + def fake_sparse_idx(sparse_idx, batch_size_rcnn): + print('Warning: Sparse_Idx_Shape(%s) \r' % (str(sparse_idx.shape)), end='', flush=True) + # at most one sample is non-empty, then fake the first voxels of each sample(BN needs at least + # two values each channel) as non-empty for the below calculation + sparse_idx = sparse_idx.new_zeros((batch_size_rcnn, 3)) + bs_idxs = torch.arange(batch_size_rcnn).type_as(sparse_idx).view(-1, 1) + sparse_idx = torch.cat((bs_idxs, sparse_idx), dim=1) + return sparse_idx + + def forward(self, batch_dict): + """ + Args: + batch_dict: + + Returns: + + """ + targets_dict = self.proposal_layer( + batch_dict, nms_config=self.model_cfg.NMS_CONFIG['TRAIN' if self.training else 'TEST'] + ) + if self.training: + targets_dict = self.assign_targets(batch_dict) + batch_dict['rois'] = targets_dict['rois'] + batch_dict['roi_labels'] = targets_dict['roi_labels'] + + # RoI aware pooling + pooled_part_features, pooled_rpn_features = self.roiaware_pool(batch_dict) + batch_size_rcnn = pooled_part_features.shape[0] # (B * N, out_x, out_y, out_z, 4) + + # transform to sparse tensors + sparse_shape = np.array(pooled_part_features.shape[1:4], dtype=np.int32) + sparse_idx = pooled_part_features.sum(dim=-1).nonzero() # (non_empty_num, 4) ==> [bs_idx, x_idx, y_idx, z_idx] + if sparse_idx.shape[0] < 3: + sparse_idx = self.fake_sparse_idx(sparse_idx, batch_size_rcnn) + if self.training: + # these are invalid samples + targets_dict['rcnn_cls_labels'].fill_(-1) + targets_dict['reg_valid_mask'].fill_(-1) + + part_features = pooled_part_features[sparse_idx[:, 0], sparse_idx[:, 1], sparse_idx[:, 2], sparse_idx[:, 3]] + rpn_features = pooled_rpn_features[sparse_idx[:, 0], sparse_idx[:, 1], sparse_idx[:, 2], sparse_idx[:, 3]] + coords = sparse_idx.int().contiguous() + # part_features = spconv.SparseConvTensor(part_features, coords, sparse_shape, batch_size_rcnn) + # rpn_features = spconv.SparseConvTensor(rpn_features, coords, sparse_shape, batch_size_rcnn) + part_features = torchsparse.SparseTensor(coords=coords, feats=part_features,) + rpn_features = torchsparse.SparseTensor(coords=coords, feats=rpn_features) + + # forward rcnn network + x_part = self.conv_part(part_features) + x_rpn = self.conv_rpn(rpn_features) + + merged_feature = torch.cat((x_rpn.feats, x_part.feats), dim=1) # (N, C) + # shared_feature = spconv.SparseConvTensor(merged_feature, coords, sparse_shape, batch_size_rcnn) + + # >>> Replacing the line shared_feature = ... >>> + shared_feature = torchsparse.SparseTensor(coords=coords, feats=merged_feature, spatial_range=(batch_size_rcnn, *sparse_shape)) + shared_feature = shared_feature.dense() + N, D, H, W, C = shared_feature.shape + shared_feature = shared_feature.permute(0, 2, 3, 4, 1).contiguous().reshape(N, H, W, C*D).permute(0, 3, 1, 2).contiguous() + shared_feature = shared_feature.view(batch_size_rcnn, -1, 1) + shared_feature = self.shared_fc_layer(shared_feature) + # <<< Replacing the line shared_feature = ... <<< + + rcnn_cls = self.cls_layers(shared_feature).transpose(1, 2).contiguous().squeeze(dim=1) # (B, 1 or 2) + rcnn_reg = self.reg_layers(shared_feature).transpose(1, 2).contiguous().squeeze(dim=1) # (B, C) + + if not self.training: + batch_cls_preds, batch_box_preds = self.generate_predicted_boxes( + batch_size=batch_dict['batch_size'], rois=batch_dict['rois'], cls_preds=rcnn_cls, box_preds=rcnn_reg + ) + batch_dict['batch_cls_preds'] = batch_cls_preds + batch_dict['batch_box_preds'] = batch_box_preds + batch_dict['cls_preds_normalized'] = False + else: + targets_dict['rcnn_cls'] = rcnn_cls + targets_dict['rcnn_reg'] = rcnn_reg + + self.forward_ret_dict = targets_dict + return batch_dict diff --git a/examples/openpcdet/setup.py b/examples/openpcdet/setup.py new file mode 100644 index 0000000..6d89069 --- /dev/null +++ b/examples/openpcdet/setup.py @@ -0,0 +1,45 @@ +from setuptools import setup, find_packages +from jinja2 import Template +import os + +config_template_paths = [ + "./cfgs_templates/kitti_models/second_plugin.yaml", + "./cfgs_templates/kitti_models/PartA2_plugin.yaml", + "./cfgs_templates/kitti_models/pv_rcnn_plugin.yaml", + "./cfgs_templates/kitti_models/voxel_rcnn_car_plugin.yaml", + "./cfgs_templates/nuscenes_models/cbgs_voxel0075_voxelnext_mini.yaml" +] + +os.makedirs("./cfgs", exist_ok=True) +os.makedirs("./cfgs/kitti_models", exist_ok=True) +os.makedirs("./cfgs/nuscenes_models", exist_ok=True) + +# define PCDET_BASE +if os.environ.get("PCDET_BASE") is None: + # throw some exceptions to ask users to deifne the environment variable + raise ValueError("Please define the environment variable PCDET_BASE") +else: + base = os.environ.get("PCDET_BASE") + print(f"PCDET_BASE: {base}") + for template_path in config_template_paths: + curr_template = Template(open(template_path).read()) + curr_template_rendered = curr_template.render(pcdet_base_path=base) + + file_name = os.path.basename(template_path) + folder_path = os.path.dirname(template_path) + folder_name = os.path.basename(folder_path) + output_file_path = os.path.join("./cfgs", folder_name, file_name) + with open(output_file_path, 'w') as file: + file.write(curr_template_rendered) + + + + +setup( + name='pcdet_plugin', + version='0.1', + packages=find_packages(), +) + +# Define global initialize torchsparse backend +# design init function, let pcdet traverse the folder we modified. Then in pcdet plugin reference folders. diff --git a/torchsparse/tensor.py b/torchsparse/tensor.py index c1b19ce..256d89c 100644 --- a/torchsparse/tensor.py +++ b/torchsparse/tensor.py @@ -112,3 +112,35 @@ def __add__(self, other): ) output._caches = self._caches return output + +class PointTensor: + def __init__(self, feats, coords, idx_query=None, weights=None): + self.F = feats + self.C = coords + self.idx_query = idx_query if idx_query is not None else {} + self.weights = weights if weights is not None else {} + self.additional_features = {} + self.additional_features['idx_query'] = {} + self.additional_features['counts'] = {} + + def cuda(self): + self.F = self.F.cuda() + self.C = self.C.cuda() + return self + + def detach(self): + self.F = self.F.detach() + self.C = self.C.detach() + return self + + def to(self, device, non_blocking=True): + self.F = self.F.to(device, non_blocking=non_blocking) + self.C = self.C.to(device, non_blocking=non_blocking) + return self + + def __add__(self, other): + tensor = PointTensor(self.F + other.F, self.C, self.idx_query, + self.weights) + tensor.additional_features = self.additional_features + return tensor +