Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,28 @@ repos:
args: ['--in-place', '--remove-unused-variables', '--remove-all-unused-imports', '--ignore-init-module-imports']

- repo: https://github.com/pycqa/isort
rev: 5.13.2
rev: 6.0.1
hooks:
- id: isort
name: sort all imports (python)
args: ["--profile", "black"] # avoid conflict with black

- repo: https://github.com/psf/black-pre-commit-mirror
rev: 24.10.0
rev: 25.1.0
hooks:
- id: black
name: black formatter
args: ['--line-length=120', '--target-version=py37', '--target-version=py38', '--target-version=py39','--target-version=py310']

- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v19.1.5
rev: v21.1.0
hooks:
- id: clang-format
name: clang formatter
types_or: [c++, c]

- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
rev: v6.0.0
hooks:
- id: check-yaml
- id: check-merge-conflict
Expand Down
4 changes: 2 additions & 2 deletions applications/ColossalQA/examples/webui_demo/webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,11 @@ def restart(chatbot, txt):
)
with gr.Row():
btn = gr.UploadButton("📁", file_types=["file"], file_count="multiple", size="sm")
restart_btn = gr.Button(str("\u21BB"), elem_id="restart-btn", scale=1)
restart_btn = gr.Button(str("\u21bb"), elem_id="restart-btn", scale=1)
txt = gr.Textbox(
scale=8,
show_label=False,
placeholder="Enter text and press enter, or use 📁 to upload files, click \u21BB to clear loaded files and restart chat",
placeholder="Enter text and press enter, or use 📁 to upload files, click \u21bb to clear loaded files and restart chat",
container=True,
autofocus=True,
)
Expand Down
4 changes: 2 additions & 2 deletions colossalai/auto_parallel/tensor_shard/solver/solver.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""This code is adapted from Alpa
https://github.com/alpa-projects/alpa/
with some changes. """
https://github.com/alpa-projects/alpa/
with some changes."""

import multiprocessing
import time
Expand Down
4 changes: 2 additions & 2 deletions colossalai/device/device_mesh.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""This code is adapted from Alpa
https://github.com/alpa-projects/alpa/
with some changes. """
https://github.com/alpa-projects/alpa/
with some changes."""

import operator
from dataclasses import dataclass
Expand Down
2 changes: 1 addition & 1 deletion colossalai/legacy/moe/openmoe/model/modeling_openmoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" PyTorch OpenMoE model."""
"""PyTorch OpenMoE model."""
import math
from typing import List, Optional, Tuple, Union

Expand Down
4 changes: 2 additions & 2 deletions colossalai/nn/layer/layernorm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""This code is from NVIDIA apex:
https://github.com/NVIDIA/apex
with some changes. """
https://github.com/NVIDIA/apex
with some changes."""

import numbers

Expand Down
2 changes: 1 addition & 1 deletion colossalai/nn/optimizer/distributed_galore.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" adapted from https://github.com/jiaweizzhao/GaLore/blob/master/galore_torch/adamw8bit.py"""
"""adapted from https://github.com/jiaweizzhao/GaLore/blob/master/galore_torch/adamw8bit.py"""

import warnings
from collections import defaultdict
Expand Down
2 changes: 1 addition & 1 deletion colossalai/nn/optimizer/galore.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" adapted from https://github.com/jiaweizzhao/GaLore/blob/master/galore_torch/adamw8bit.py"""
"""adapted from https://github.com/jiaweizzhao/GaLore/blob/master/galore_torch/adamw8bit.py"""

import warnings
from typing import List
Expand Down
2 changes: 1 addition & 1 deletion colossalai/shardformer/modeling/chatglm2.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" PyTorch ChatGLM model. """
"""PyTorch ChatGLM model."""

from typing import List, Optional, Tuple

Expand Down
4 changes: 2 additions & 2 deletions examples/community/roberta/preprocessing/get_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def __init__(
self.do_whole_word_mask = do_whole_word_mask
self.max_predictions_per_seq = max_predictions_per_seq
self.vocab_words = list(tokenizer.vocab.keys())
self.rec = re.compile("[\u4E00-\u9FA5]")
self.whole_rec = re.compile("##[\u4E00-\u9FA5]")
self.rec = re.compile("[\u4e00-\u9fa5]")
self.whole_rec = re.compile("##[\u4e00-\u9fa5]")

self.mlm_p = 0.15
self.mlm_mask_p = 0.8
Expand Down
10 changes: 5 additions & 5 deletions examples/community/roberta/preprocessing/mask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,15 @@ auto get_new_segment(
return new_segment;
}

bool startsWith(const std::string &s, const std::string &sub) {
bool startsWith(const std::string& s, const std::string& sub) {
return s.find(sub) == 0 ? true : false;
}

auto create_whole_masked_lm_predictions(
std::vector<std::string> &tokens,
const std::vector<std::string> &original_tokens,
const std::vector<std::string> &vocab_words,
std::map<std::string, int> &vocab, const int max_predictions_per_seq,
std::vector<std::string>& tokens,
const std::vector<std::string>& original_tokens,
const std::vector<std::string>& vocab_words,
std::map<std::string, int>& vocab, const int max_predictions_per_seq,
const double masked_lm_prob) {
// for (auto item : vocab) {
// std::cout << "key=" << std::string(py::str(item.first)) << ", "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" PyTorch DeBERTa-v2 model."""
"""PyTorch DeBERTa-v2 model."""

import math
from collections.abc import Sequence
Expand Down
18 changes: 9 additions & 9 deletions extensions/csrc/kernel/arm/cpu_adam_arm.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "cpu_adam_arm.h"

void AdamOptimizer::Step_1(void *_params, void *grads, void *_exp_avg,
void *_exp_avg_sq, size_t _param_size,
void AdamOptimizer::Step_1(void* _params, void* grads, void* _exp_avg,
void* _exp_avg_sq, size_t _param_size,
at::ScalarType param_dtype,
at::ScalarType grad_dtype,
at::ScalarType exp_avg_dtype,
Expand Down Expand Up @@ -106,8 +106,8 @@ void AdamOptimizer::Step_1(void *_params, void *grads, void *_exp_avg,
}
}

void AdamOptimizer::Step_4(void *_params, void *grads, void *_exp_avg,
void *_exp_avg_sq, size_t _param_size,
void AdamOptimizer::Step_4(void* _params, void* grads, void* _exp_avg,
void* _exp_avg_sq, size_t _param_size,
at::ScalarType param_dtype,
at::ScalarType grad_dtype,
at::ScalarType exp_avg_dtype,
Expand Down Expand Up @@ -192,8 +192,8 @@ void AdamOptimizer::Step_4(void *_params, void *grads, void *_exp_avg,
}
}

void AdamOptimizer::Step_8(void *_params, void *grads, void *_exp_avg,
void *_exp_avg_sq, size_t _param_size,
void AdamOptimizer::Step_8(void* _params, void* grads, void* _exp_avg,
void* _exp_avg_sq, size_t _param_size,
at::ScalarType param_dtype,
at::ScalarType grad_dtype,
at::ScalarType exp_avg_dtype,
Expand Down Expand Up @@ -279,9 +279,9 @@ void AdamOptimizer::Step_8(void *_params, void *grads, void *_exp_avg,

void AdamOptimizer::step(size_t step, float lr, float beta1, float beta2,
float epsilon, float weight_decay,
bool bias_correction, torch::Tensor &params,
torch::Tensor &grads, torch::Tensor &exp_avg,
torch::Tensor &exp_avg_sq, float loss_scale) {
bool bias_correction, torch::Tensor& params,
torch::Tensor& grads, torch::Tensor& exp_avg,
torch::Tensor& exp_avg_sq, float loss_scale) {
auto params_c = params.contiguous();
auto grads_c = grads.contiguous();
auto exp_avg_c = exp_avg.contiguous();
Expand Down
44 changes: 22 additions & 22 deletions extensions/csrc/kernel/arm/cpu_adam_arm.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@
#include <arm_neon.h>
#define SIMD_WIDTH 4

inline float32x4_t simd_load_offset(const void *ptr, at::ScalarType dtype,
inline float32x4_t simd_load_offset(const void* ptr, at::ScalarType dtype,
size_t offset) {
switch (dtype) {
case at::ScalarType::Float: {
auto ptr_f = reinterpret_cast<const float32_t *>(ptr);
auto ptr_f = reinterpret_cast<const float32_t*>(ptr);
return vld1q_f32(ptr_f + offset);
}
case at::ScalarType::Half: {
auto ptr_h = reinterpret_cast<const float16_t *>(ptr);
auto ptr_h = reinterpret_cast<const float16_t*>(ptr);
return vcvt_f32_f16(vld1_f16(ptr_h + offset));
}
// case at::ScalarType::BFloat16: {
Expand All @@ -31,20 +31,20 @@ inline float32x4_t simd_load_offset(const void *ptr, at::ScalarType dtype,
break;
}
}
inline float32x4_t simd_load(void const *ptr, at::ScalarType dtype) {
inline float32x4_t simd_load(void const* ptr, at::ScalarType dtype) {
return simd_load_offset(ptr, dtype, 0);
}

inline void simd_store_offset(void *ptr, at::ScalarType dtype, float32x4_t data,
inline void simd_store_offset(void* ptr, at::ScalarType dtype, float32x4_t data,
size_t offset) {
switch (dtype) {
case at::ScalarType::Float: {
auto ptr_f = reinterpret_cast<float32_t *>(ptr);
auto ptr_f = reinterpret_cast<float32_t*>(ptr);
vst1q_f32(ptr_f + offset, data);
break;
}
case at::ScalarType::Half: {
auto ptr_h = reinterpret_cast<float16_t *>(ptr);
auto ptr_h = reinterpret_cast<float16_t*>(ptr);
vst1_f16(ptr_h + offset, vcvt_f16_f32(data));
break;
}
Expand All @@ -59,7 +59,7 @@ inline void simd_store_offset(void *ptr, at::ScalarType dtype, float32x4_t data,
}
}

inline void simd_store(void *ptr, at::ScalarType dtype, float32x4_t data) {
inline void simd_store(void* ptr, at::ScalarType dtype, float32x4_t data) {
return simd_store_offset(ptr, dtype, data, 0);
}

Expand All @@ -70,14 +70,14 @@ inline float32x4_t simd_set(float value) {

#endif

inline float scalar_load_offset(const void *ptr, at::ScalarType dtype,
inline float scalar_load_offset(const void* ptr, at::ScalarType dtype,
size_t offset) {
switch (dtype) {
case at::ScalarType::Float:
return *(reinterpret_cast<const float *>(ptr) + offset);
return *(reinterpret_cast<const float*>(ptr) + offset);
case at::ScalarType::Half:
return static_cast<float>(
*(reinterpret_cast<const at::Half *>(ptr) + offset));
*(reinterpret_cast<const at::Half*>(ptr) + offset));
// case at::ScalarType::BFloat16:
// return static_cast<float>(
// *(reinterpret_cast<const at::BFloat16 *>(ptr) + offset));
Expand All @@ -87,14 +87,14 @@ inline float scalar_load_offset(const void *ptr, at::ScalarType dtype,
}
}

inline void scalar_store_offset(void *ptr, at::ScalarType dtype, float data,
inline void scalar_store_offset(void* ptr, at::ScalarType dtype, float data,
size_t offset) {
switch (dtype) {
case at::ScalarType::Float:
*(reinterpret_cast<float *>(ptr) + offset) = data;
*(reinterpret_cast<float*>(ptr) + offset) = data;
break;
case at::ScalarType::Half:
*(reinterpret_cast<at::Half *>(ptr) + offset) = data;
*(reinterpret_cast<at::Half*>(ptr) + offset) = data;
break;
// case at::ScalarType::BFloat16:
// *(reinterpret_cast<at::BFloat16 *>(ptr) + offset) = data;
Expand All @@ -105,13 +105,13 @@ inline void scalar_store_offset(void *ptr, at::ScalarType dtype, float data,
}
}

inline void *scalar_seek_offset(void *ptr, at::ScalarType dtype,
inline void* scalar_seek_offset(void* ptr, at::ScalarType dtype,
size_t offset) {
switch (dtype) {
case at::ScalarType::Float:
return reinterpret_cast<float *>(ptr) + offset;
return reinterpret_cast<float*>(ptr) + offset;
case at::ScalarType::Half:
return reinterpret_cast<at::Half *>(ptr) + offset;
return reinterpret_cast<at::Half*>(ptr) + offset;
// case at::ScalarType::BFloat16:
// return reinterpret_cast<at::BFloat16 *>(ptr) + offset;
default:
Expand All @@ -120,8 +120,8 @@ inline void *scalar_seek_offset(void *ptr, at::ScalarType dtype,
}
}
#define STEP(SPAN) \
void Step_##SPAN(void *_params, void *grads, void *_exp_avg, \
void *_exp_avg_sq, size_t _param_size, \
void Step_##SPAN(void* _params, void* grads, void* _exp_avg, \
void* _exp_avg_sq, size_t _param_size, \
at::ScalarType param_dtype, at::ScalarType grad_dtype, \
at::ScalarType exp_avg_dtype, \
at::ScalarType exp_avg_sq_dtype, float loss_scale = -1);
Expand Down Expand Up @@ -195,7 +195,7 @@ class AdamOptimizer {
}

void step(size_t step, float lr, float beta1, float beta2, float epsilon,
float weight_decay, bool bias_correction, torch::Tensor &params,
torch::Tensor &grads, torch::Tensor &exp_avg,
torch::Tensor &exp_avg_sq, float loss_scale);
float weight_decay, bool bias_correction, torch::Tensor& params,
torch::Tensor& grads, torch::Tensor& exp_avg,
torch::Tensor& exp_avg_sq, float loss_scale);
};
24 changes: 12 additions & 12 deletions extensions/csrc/kernel/cuda/utils/vec_copy.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,36 +9,36 @@ namespace cuda {
namespace utils {

template <typename T, int VecSize>
__device__ __inline__ void copy_zero(T *dst) {
__device__ __inline__ void copy_zero(T* dst) {
using VT = typename common::VecTypeTrait<T, VecSize>::Type;
*(reinterpret_cast<VT *>(dst)) = funcs::CastFunctor<float, VT>()(0.0f);
*(reinterpret_cast<VT*>(dst)) = funcs::CastFunctor<float, VT>()(0.0f);
}

template <typename SrcT, typename DstT, int VecSize>
__device__ __inline__ void copy(const SrcT *src, DstT *dst) {
__device__ __inline__ void copy(const SrcT* src, DstT* dst) {
using SrcVT = typename common::VecTypeTrait<SrcT, VecSize>::Type;
using DstVT = typename common::VecTypeTrait<DstT, VecSize>::Type;
*(reinterpret_cast<DstVT *>(dst)) = funcs::CastFunctor<SrcVT, DstVT>()(
*(reinterpret_cast<const SrcVT *>(src)));
*(reinterpret_cast<DstVT*>(dst)) = funcs::CastFunctor<SrcVT, DstVT>()(
*(reinterpret_cast<const SrcVT*>(src)));
}

template <typename T, int VecSize>
__device__ __inline__ void copy(const T *src, T *dst) {
__device__ __inline__ void copy(const T* src, T* dst) {
using VT = typename common::VecTypeTrait<T, VecSize>::Type;
*(reinterpret_cast<VT *>(dst)) = *(reinterpret_cast<const VT *>(src));
*(reinterpret_cast<VT*>(dst)) = *(reinterpret_cast<const VT*>(src));
}

template <>
__device__ __inline__ void copy<float, float, 8>(const float *src, float *dst) {
__device__ __inline__ void copy<float, float, 8>(const float* src, float* dst) {
// Since the maximum memory alignment length is 128 bits, we choose float4
// here.
*(reinterpret_cast<float4 *>(dst)) = *(reinterpret_cast<const float4 *>(src));
*(reinterpret_cast<float4 *>(dst + 4)) =
*(reinterpret_cast<const float4 *>(src + 4));
*(reinterpret_cast<float4*>(dst)) = *(reinterpret_cast<const float4*>(src));
*(reinterpret_cast<float4*>(dst + 4)) =
*(reinterpret_cast<const float4*>(src + 4));
}

template <typename T>
int get_vec_size(const torch::Tensor &tensor) {
int get_vec_size(const torch::Tensor& tensor) {
uint64_t address = reinterpret_cast<uint64_t>(tensor.data_ptr());
const int max_aligned_size = 128;
const int dtype_size = sizeof(T) * 8;
Expand Down
Loading
Loading