Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions aiter/fused_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from aiter import get_hip_quant as get_quant
from aiter import logger
from aiter.jit.core import (
AITER_CONFIG_FMOE_FILE,
AITER_CONFIGS,
PY,
bd_dir,
get_asm_dir,
Expand Down Expand Up @@ -573,8 +573,8 @@ def get_cfg_2stages(tune_file):
return cfg_2stages

global cfg_2stages
config_path = os.path.dirname(AITER_CONFIG_FMOE_FILE)
tune_file = AITER_CONFIG_FMOE_FILE
config_path = os.path.dirname(AITER_CONFIGS.AITER_CONFIG_FMOE_FILE)
tune_file = AITER_CONFIGS.AITER_CONFIG_FMOE_FILE
untune_file = os.path.join(config_path, "untuned_fmoe.csv")
profile_file = os.path.join(config_path, "profile_fmoe.csv")
if cfg_2stages is None:
Expand Down
275 changes: 157 additions & 118 deletions aiter/jit/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,81 +66,6 @@ def mp_lock(


# config_env start here
def update_config_files(file_path: str, merge_name: str):
path_list = file_path.split(os.pathsep) if file_path else []
if len(path_list) <= 1:
return file_path
df_list = []
## merge config files
##example: AITER_CONFIG_GEMM_A4W4="/path1:/path2"
import pandas as pd

df_list.append(pd.read_csv(path_list[0]))
for i, path in enumerate(path_list[1:]):
if os.path.exists(path):
df = pd.read_csv(path)
## check columns
assert (
df.columns.tolist() == df_list[0].columns.tolist()
), f"Column mismatch between {path_list[0]} and {path}, {df_list[0].columns.tolist()}, {df.columns.tolist()}"

df_list.append(df)
else:
logger.info(f"path {i+1}: {path} (not exist)")
merge_df = pd.concat(df_list, ignore_index=True) if df_list else pd.DataFrame()
## get keys from untuned file to drop_duplicates
untuned_name = (
re.sub(r"(?:_)?tuned$", r"\1untuned", merge_name)
if re.search(r"(?:_)?tuned$", merge_name)
else merge_name.replace("tuned", "untuned")
)
untuned_path = f"{AITER_ROOT_DIR}/aiter/configs/{untuned_name}.csv"
if os.path.exists(untuned_path):
untunedf = pd.read_csv(untuned_path)
keys = untunedf.columns.to_list()
keys.append("cu_num")
merge_df = (
merge_df.sort_values("us")
.drop_duplicates(subset=keys, keep="first")
.reset_index(drop=True)
)
else:
logger.warning(
f"Untuned config file not found: {untuned_path}. Using all columns for deduplication."
)
new_file_path = f"/tmp/{merge_name}.csv"
merge_df.to_csv(new_file_path, index=False)
return new_file_path


def get_config_file(env_name, default_file, tuned_file_name):
config_env_file = os.getenv(env_name)
# default_file = f"{AITER_ROOT_DIR}/aiter/configs/{tuned_file_name}.csv"
from pathlib import Path

if not config_env_file:
model_config_dir = Path(f"{AITER_ROOT_DIR}/aiter/configs/model_configs/")
op_tuned_file_list = [
p
for p in model_config_dir.glob(f"*{tuned_file_name}*")
if (p.is_file() and "untuned" not in str(p))
]

if not op_tuned_file_list:
config_file = default_file
else:
tuned_files = ":".join(str(p) for p in op_tuned_file_list)
tuned_files = default_file + ":" + tuned_files
logger.info(
f"merge tuned file under model_configs/ and configs/ {tuned_files}"
)
config_file = update_config_files(tuned_files, tuned_file_name)
else:
config_file = update_config_files(config_env_file, tuned_file_name)
# print(f"get config file from environment ", config_file)
return config_file


AITER_CONFIG_GEMM_A4W4 = os.getenv(
"AITER_CONFIG_GEMM_A4W4",
f"{AITER_ROOT_DIR}/aiter/configs/a4w4_blockscale_tuned_gemm.csv",
Expand Down Expand Up @@ -185,56 +110,170 @@ def get_config_file(env_name, default_file, tuned_file_name):

AITER_CONFIG_GEMM_BF16 = os.getenv(
"AITER_CONFIG_GEMM_BF16",
f"{AITER_ROOT_DIR}/aiter/configs/tuned_gemm.csv",
)
AITER_CONFIG_GEMM_A4W4_FILE = get_config_file(
"AITER_CONFIG_GEMM_A4W4", AITER_CONFIG_GEMM_A4W4, "a4w4_blockscale_tuned_gemm"
f"{AITER_ROOT_DIR}/aiter/configs/bf16_tuned_gemm.csv",
)

AITER_CONFIG_GEMM_A8W8_FILE = get_config_file(
"AITER_CONFIG_GEMM_A8W8", AITER_CONFIG_GEMM_A8W8, "a8w8_tuned_gemm"
)
AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_FILE = get_config_file(
"AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE",
AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE,
"a8w8_bpreshuffle_tuned_gemm",
)
AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_CKTILE_FILE = get_config_file(
"AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_CKTILE",
AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_CKTILE,
"a8w8_bpreshuffle_cktile_tuned_gemm",
)
AITER_CONFIG_GEMM_A8W8_BLOCKSCALE_FILE = get_config_file(
"AITER_CONFIG_GEMM_A8W8_BLOCKSCALE",
AITER_CONFIG_GEMM_A8W8_BLOCKSCALE,
"a8w8_blockscale_tuned_gemm",
)
AITER_CONFIG_FMOE_FILE = get_config_file(
"AITER_CONFIG_FMOE", AITER_CONFIG_FMOE, "tuned_fmoe"
)

AITER_CONFIG_GEMM_A8W8_BLOCKSCALE_BPRESHUFFLE_FILE = get_config_file(
"AITER_CONFIG_GEMM_A8W8_BLOCKSCALE_BPRESHUFFLE",
AITER_CONFIG_GEMM_A8W8_BLOCKSCALE_BPRESHUFFLE,
"a8w8_blockscale_bpreshuffle_tuned_gemm",
)
class AITER_CONFIG(object):
@property
def AITER_CONFIG_GEMM_A4W4_FILE(self):
return self.get_config_file(
"AITER_CONFIG_GEMM_A4W4",
AITER_CONFIG_GEMM_A4W4,
"a4w4_blockscale_tuned_gemm",
)

AITER_CONFIG_A8W8_BATCHED_GEMM_FILE = get_config_file(
"AITER_CONFIG_A8W8_BATCHED_GEMM",
AITER_CONFIG_A8W8_BATCHED_GEMM,
"a8w8_tuned_batched_gemm",
)
@property
def AITER_CONFIG_GEMM_A8W8_FILE(self):
return self.get_config_file(
"AITER_CONFIG_GEMM_A8W8", AITER_CONFIG_GEMM_A8W8, "a8w8_tuned_gemm"
)

AITER_CONFIG_BF16_BATCHED_GEMM_FILE = get_config_file(
"AITER_CONFIG_BF16_BATCHED_GEMM",
AITER_CONFIG_BF16_BATCHED_GEMM,
"bf16_tuned_batched_gemm",
)
@property
def AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_FILE(self):
return self.get_config_file(
"AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE",
AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE,
"a8w8_bpreshuffle_tuned_gemm",
)

@property
def AITER_CONFIG_GEMM_A8W8_BLOCKSCALE_FILE(self):
return self.get_config_file(
"AITER_CONFIG_GEMM_A8W8_BLOCKSCALE",
AITER_CONFIG_GEMM_A8W8_BLOCKSCALE,
"a8w8_blockscale_tuned_gemm",
)

@property
def AITER_CONFIG_FMOE_FILE(self):
return self.get_config_file(
"AITER_CONFIG_FMOE", AITER_CONFIG_FMOE, "tuned_fmoe"
)

@property
def AITER_CONFIG_GEMM_A8W8_BLOCKSCALE_BPRESHUFFLE_FILE(self):
return self.get_config_file(
"AITER_CONFIG_GEMM_A8W8_BLOCKSCALE_BPRESHUFFLE",
AITER_CONFIG_GEMM_A8W8_BLOCKSCALE_BPRESHUFFLE,
"a8w8_blockscale_bpreshuffle_tuned_gemm",
)

@property
def AITER_CONFIG_A8W8_BATCHED_GEMM_FILE(self):
return self.get_config_file(
"AITER_CONFIG_A8W8_BATCHED_GEMM",
AITER_CONFIG_A8W8_BATCHED_GEMM,
"a8w8_tuned_batched_gemm",
)

@property
def AITER_CONFIG_BF16_BATCHED_GEMM_FILE(self):
return self.get_config_file(
"AITER_CONFIG_BF16_BATCHED_GEMM",
AITER_CONFIG_BF16_BATCHED_GEMM,
"bf16_tuned_batched_gemm",
)

@property
def AITER_CONFIG_GEMM_BF16_FILE(self):
return self.get_config_file(
"AITER_CONFIG_GEMM_BF16", AITER_CONFIG_GEMM_BF16, "bf16_tuned_gemm"
)

@property
def AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_CKTILE_FILE(self):
return self.get_config_file(
"AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_CKTILE",
AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_CKTILE,
"a8w8_bpreshuffle_cktile_tuned_gemm",
)

def update_config_files(self, file_path: str, merge_name: str):
path_list = file_path.split(os.pathsep) if file_path else []
if len(path_list) <= 1:
return file_path
df_list = []
## merge config files
##example: AITER_CONFIG_GEMM_A4W4="/path1:/path2"
import pandas as pd

df_list.append(pd.read_csv(path_list[0]))
Copy link

Copilot AI Nov 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The first file in path_list[0] is read without checking if it exists. If the first path doesn't exist, this will raise a FileNotFoundError. Consider adding an existence check before reading:

if os.path.exists(path_list[0]):
    df_list.append(pd.read_csv(path_list[0]))
else:
    logger.error(f"Primary config file not found: {path_list[0]}")
    return file_path
Suggested change
df_list.append(pd.read_csv(path_list[0]))
if os.path.exists(path_list[0]):
df_list.append(pd.read_csv(path_list[0]))
else:
logger.error(f"Primary config file not found: {path_list[0]}")
return file_path

Copilot uses AI. Check for mistakes.
for i, path in enumerate(path_list[1:]):
if os.path.exists(path):
df = pd.read_csv(path)
## check columns
assert (
df.columns.tolist() == df_list[0].columns.tolist()
), f"Column mismatch between {path_list[0]} and {path}, {df_list[0].columns.tolist()}, {df.columns.tolist()}"

df_list.append(df)
else:
logger.info(f"path {i+1}: {path} (not exist)")
merge_df = pd.concat(df_list, ignore_index=True) if df_list else pd.DataFrame()
## get keys from untuned file to drop_duplicates
untuned_name = (
re.sub(r"(?:_)?tuned$", r"\1untuned", merge_name)
Copy link

Copilot AI Nov 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The regex replacement r"\1untuned" refers to capture group 1, but the pattern r"(?:_)?tuned$" uses a non-capturing group (?:_)?. This means there is no group 1 to reference, and the replacement will literally insert "\1" into the string. The pattern should use a capturing group instead:

untuned_name = re.sub(r"(_)?tuned$", r"\1untuned", merge_name)

Remove the ?: to make it a capturing group.

Suggested change
re.sub(r"(?:_)?tuned$", r"\1untuned", merge_name)
re.sub(r"(_)?tuned$", r"\1untuned", merge_name)

Copilot uses AI. Check for mistakes.
if re.search(r"(?:_)?tuned$", merge_name)
else merge_name.replace("tuned", "untuned")
)
untuned_path = f"{AITER_ROOT_DIR}/aiter/configs/{untuned_name}.csv"
if os.path.exists(untuned_path):
untunedf = pd.read_csv(untuned_path)
keys = untunedf.columns
merge_df = (
merge_df.sort_values("us")
.drop_duplicates(subset=keys, keep="first")
.reset_index(drop=True)
)
Comment on lines +224 to +228
Copy link

Copilot AI Nov 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sort_values("us") call assumes that all config files have a "us" column. If any of the merged dataframes don't have this column (e.g., if a user provides a custom config file), this will raise a KeyError. Consider adding error handling or checking for column existence before sorting:

if "us" in merge_df.columns:
    merge_df = merge_df.sort_values("us").drop_duplicates(subset=keys, keep="first").reset_index(drop=True)
else:
    merge_df = merge_df.drop_duplicates(subset=keys, keep="first").reset_index(drop=True)
Suggested change
merge_df = (
merge_df.sort_values("us")
.drop_duplicates(subset=keys, keep="first")
.reset_index(drop=True)
)
if "us" in merge_df.columns:
merge_df = (
merge_df.sort_values("us")
.drop_duplicates(subset=keys, keep="first")
.reset_index(drop=True)
)
else:
merge_df = (
merge_df.drop_duplicates(subset=keys, keep="first")
.reset_index(drop=True)
)

Copilot uses AI. Check for mistakes.
else:
logger.warning(
f"Untuned config file not found: {untuned_path}. Using all columns for deduplication."
)
from pathlib import Path

config_path = Path("/tmp/aiter_configs/")
if not config_path.exists():
config_path.mkdir(parents=True, exist_ok=True)
new_file_path = f"{config_path}/{merge_name}.csv"
lock_path = f"{new_file_path}.lock"

def write_config():
merge_df.to_csv(new_file_path, index=False)

mp_lock(lock_path, write_config)
return new_file_path

@functools.lru_cache(maxsize=20)
Copy link

Copilot AI Nov 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The get_config_file method reads environment variables via os.getenv() on every property access, but the result is cached. This creates an inconsistency where environment variable changes won't be reflected after the first access. If an environment variable is set or modified after the first call, the cached result will be returned, ignoring the new value. This contradicts the dynamic nature of environment variables.

Suggested change
@functools.lru_cache(maxsize=20)

Copilot uses AI. Check for mistakes.
def get_config_file(self, env_name, default_file, tuned_file_name):
Comment on lines +247 to +248
Copy link

Copilot AI Nov 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The lru_cache on get_config_file doesn't account for changes to environment variables or config files after the first call. The cache key is based on (env_name, default_file, tuned_file_name), but if the environment variable value changes at runtime or if config files are modified, the cache will still return stale results. The PR description mentions "regenerate when the file changed" but there's no file change detection mechanism implemented. Consider either:

  1. Removing the cache if runtime changes are expected
  2. Adding file modification time checks to the cache key
  3. Implementing a cache invalidation mechanism

Copilot uses AI. Check for mistakes.
config_env_file = os.getenv(env_name)
# default_file = f"{AITER_ROOT_DIR}/aiter/configs/{tuned_file_name}.csv"
from pathlib import Path

if not config_env_file:
model_config_dir = Path(f"{AITER_ROOT_DIR}/aiter/configs/model_configs/")
op_tuned_file_list = [
p
for p in model_config_dir.glob(f"*{tuned_file_name}*")
Copy link

Copilot AI Nov 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The glob pattern f"*{tuned_file_name}*" could match unintended files. For example, if tuned_file_name is "a8w8_tuned_gemm", it would match files like "a8w8_tuned_gemm_backup.csv" or "old_a8w8_tuned_gemm.csv". Consider using a more specific pattern or exact filename matching to avoid accidentally including unrelated files in the merge.

Suggested change
for p in model_config_dir.glob(f"*{tuned_file_name}*")
for p in model_config_dir.glob(f"{tuned_file_name}.csv")

Copilot uses AI. Check for mistakes.
if (p.is_file() and "untuned" not in str(p))
]

if not op_tuned_file_list:
config_file = default_file
else:
tuned_files = ":".join(str(p) for p in op_tuned_file_list)
tuned_files = default_file + ":" + tuned_files
logger.info(
f"merge tuned file under model_configs/ and configs/ {tuned_files}"
)
config_file = self.update_config_files(tuned_files, tuned_file_name)
else:
config_file = self.update_config_files(config_env_file, tuned_file_name)
# print(f"get config file from environment ", config_file)
return config_file

AITER_CONFIG_GEMM_BF16_FILE = get_config_file(
"AITER_CONFIG_GEMM_BF16", AITER_CONFIG_GEMM_BF16, "bf16_tuned_gemm"
)

AITER_CONFIGS = AITER_CONFIG()
# config_env end here

find_aiter = importlib.util.find_spec("aiter")
Expand Down
16 changes: 8 additions & 8 deletions aiter/jit/optCompilerConfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@
"extra_include": [
"f'{AITER_CSRC_DIR}/ck_batched_gemm_bf16/include'"
],
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/ck_batched_gemm_bf16/gen_instances.py --working_path {{}} --tune_file {AITER_CONFIG_BF16_BATCHED_GEMM_FILE}'"
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/ck_batched_gemm_bf16/gen_instances.py --working_path {{}} --tune_file {AITER_CONFIGS.AITER_CONFIG_BF16_BATCHED_GEMM_FILE}'"
},
"module_batched_gemm_a8w8": {
"srcs": [
Expand All @@ -198,7 +198,7 @@
"extra_include": [
"f'{AITER_CSRC_DIR}/ck_batched_gemm_a8w8/include'"
],
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/ck_batched_gemm_a8w8/gen_instances.py --working_path {{}} --tune_file {AITER_CONFIG_A8W8_BATCHED_GEMM_FILE}'"
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/ck_batched_gemm_a8w8/gen_instances.py --working_path {{}} --tune_file {AITER_CONFIGS.AITER_CONFIG_A8W8_BATCHED_GEMM_FILE}'"
},
"module_gemm_a8w8": {
"srcs": [
Expand All @@ -208,7 +208,7 @@
"extra_include": [
"f'{AITER_CSRC_DIR}/ck_gemm_a8w8/include'"
],
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/ck_gemm_a8w8/gen_instances.py --working_path {{}} --tune_file {AITER_CONFIG_GEMM_A8W8_FILE}'"
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/ck_gemm_a8w8/gen_instances.py --working_path {{}} --tune_file {AITER_CONFIGS.AITER_CONFIG_GEMM_A8W8_FILE}'"
},
"module_gemm_a8w8_blockscale": {
"srcs": [
Expand All @@ -223,7 +223,7 @@
"'-mllvm -greedy-reverse-local-assignment=1'",
"'-mllvm --amdgpu-use-amdgpu-trackers=1'"
],
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/ck_gemm_a8w8_blockscale/gen_instances.py --working_path {{}} --tune_file {AITER_CONFIG_GEMM_A8W8_BLOCKSCALE_FILE}'"
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/ck_gemm_a8w8_blockscale/gen_instances.py --working_path {{}} --tune_file {AITER_CONFIGS.AITER_CONFIG_GEMM_A8W8_BLOCKSCALE_FILE}'"
},
"module_gemm_a8w8_blockscale_bpreshuffle": {
"srcs": [
Expand All @@ -238,7 +238,7 @@
"'-mllvm -greedy-reverse-local-assignment=1'",
"'-mllvm --amdgpu-use-amdgpu-trackers=1'"
],
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/ck_gemm_a8w8_blockscale_bpreshuffle/gen_instances.py --working_path {{}} --tune_file {AITER_CONFIG_GEMM_A8W8_BLOCKSCALE_BPRESHUFFLE_FILE}'"
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/ck_gemm_a8w8_blockscale_bpreshuffle/gen_instances.py --working_path {{}} --tune_file {AITER_CONFIGS.AITER_CONFIG_GEMM_A8W8_BLOCKSCALE_BPRESHUFFLE_FILE}'"
},
"module_gemm_a4w4_blockscale": {
"srcs": [
Expand All @@ -254,7 +254,7 @@
"'-mllvm --amdgpu-use-amdgpu-trackers=1'"
],
"hip_clang_path": "os.environ.get('GEMM_A4W4_BLOCKWISE_HIP_CLANG_PATH')",
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/ck_gemm_a4w4_blockscale/gen_instances.py --working_path {{}} --tune_file {AITER_CONFIG_GEMM_A4W4_FILE}'"
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/ck_gemm_a4w4_blockscale/gen_instances.py --working_path {{}} --tune_file {AITER_CONFIGS.AITER_CONFIG_GEMM_A4W4_FILE}'"
},
"module_gemm_a8w8_bpreshuffle": {
"srcs": [
Expand All @@ -267,7 +267,7 @@
],
"is_python_module": "True",
"is_standalone": "False",
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/ck_gemm_a8w8_bpreshuffle/gen_instances.py --working_path {{}} --tune_file {AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_FILE}'"
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/ck_gemm_a8w8_bpreshuffle/gen_instances.py --working_path {{}} --tune_file {AITER_CONFIGS.AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_FILE}'"
},
"module_deepgemm": {
"srcs": [
Expand Down Expand Up @@ -305,7 +305,7 @@
"is_standalone": "False",
"verbose": "False",
"hip_clang_path": "os.environ.get('FLATMM_HIP_CLANG_PATH')",
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/cktile_gemm_a8w8_bpreshuffle/gen_instances.py --working_path {{}} --tune_file {AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_CKTILE_FILE}'"
"blob_gen_cmd": "f'{AITER_CSRC_DIR}/cktile_gemm_a8w8_bpreshuffle/gen_instances.py --working_path {{}} --tune_file {AITER_CONFIGS.AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_CKTILE_FILE}'"
},
"module_gemm_a8w8_asm": {
"srcs": [
Expand Down
Loading
Loading