-
Notifications
You must be signed in to change notification settings - Fork 4
Add exclusion of zeros in diagnose_matrix #8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 14 commits
Commits
Show all changes
21 commits
Select commit
Hold shift + click to select a range
23a9941
Add exclusion of zeros in diagnose_matrix
yyoean 329c1d4
Update src/deepxtrace/diagnose.py
yyoean dbd190c
Update src/deepxtrace/diagnose.py
yyoean 14e9cd2
Update src/deepxtrace/diagnose.py
yyoean 5ec3877
autopep8
yyoean 1afd8a9
format
yyoean 463843a
format
yyoean dc0f847
essential
yyoean 349b537
format
yyoean ff63728
format
yyoean d5da655
format
yyoean 0e18f72
formart
yyoean 0f261d0
format
yyoean 51be7ff
format
yyoean 88a7f79
update
yyoean cce3028
format
yyoean e64c6ed
simplify
yyoean d33572e
test.py
yyoean 7a456ed
format
yyoean 860c405
format
yyoean ed6ee32
format
yyoean File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -117,6 +117,7 @@ class Diagnose: | |
| DEEPEP_DIAGNOSE_THRESHOLD_COL: determine threshold for abnormal columns. Default 3.0. | ||
| DEEPEP_DIAGNOSE_THRESHOLD_ROW: determine threshold for abnormal rows. Default 3.0. | ||
| DEEPEP_DIAGNOSE_THRESHOLD_POINT: determine threshold for abnormal individual points. Default 5.0. | ||
| DEEPEP_DIAGNOSE_EXCLUDING_ZEROS: controls whether excluding zeros in diagnose_matrix. Default 0. | ||
|
|
||
| """ | ||
|
|
||
|
|
@@ -161,6 +162,8 @@ def __init__( | |
| os.getenv( | ||
| "DEEPEP_DIAGNOSE_THRESHOLD_POINT", | ||
| 5.0)) | ||
| self.excluding_zeros = int( | ||
| os.getenv("DEEPEP_DIAGNOSE_EXCLUDING_ZEROS", 0)) | ||
|
|
||
| # Initialize the diagnose | ||
| self.group = group | ||
|
|
@@ -306,7 +309,7 @@ def _setup_logger_internal( | |
| @staticmethod | ||
| def diagnose_matrix( | ||
| mat, thres_col=3.0, thres_row=3.0, thres_point=5.0, | ||
| suppress_points_in_strong_rowscols=True | ||
| suppress_points_in_strong_rowscols=True, excluing_zeros=0 | ||
| ): | ||
| """ | ||
| Detect abnormal columns, rows, and individual points in a 2D wait-time matrix. | ||
|
|
@@ -325,43 +328,107 @@ def diagnose_matrix( | |
| "abnormal_points": List[List[int, int, float, float]] # abnormal points, [row, col, value, normalized_value] | ||
| } | ||
| """ | ||
| # 1. Check for abnormal columns | ||
| col_means = mat.mean(axis=0) | ||
| # z_col = (col_means - col_means.mean()) / (col_means.std() + 1e-8) | ||
| z_col = col_means / (col_means.mean() + 1e-8) | ||
| abnormal_cols = [ | ||
| [j, col_means[j], z_col[j]] | ||
| for j in np.where(z_col > thres_col)[0] | ||
| ] | ||
|
|
||
| # 2. Check for abnormal rows | ||
| row_means = mat.mean(axis=1) | ||
| # z_row = (row_means - row_means.mean()) / (row_means.std() + 1e-8) | ||
| z_row = row_means / (row_means.mean() + 1e-8) | ||
| abnormal_rows = [ | ||
| [i, row_means[i], z_row[i]] | ||
| for i in np.where(z_row > thres_row)[0] | ||
| ] | ||
|
|
||
| # 3. Check for abnormal single points | ||
| # z_all = (mat - mat.mean()) / (mat.std() + 1e-8) | ||
| z_all = mat / (mat.mean() + 1e-8) | ||
| # Get all positions with z-score > threshold | ||
| abnormal_points = [ | ||
| [i, j, mat[i, j], z_all[i, j]] | ||
| for i in range(mat.shape[0]) | ||
| for j in range(mat.shape[1]) | ||
| if z_all[i, j] > thres_point | ||
| ] | ||
| # Optionally remove points that are in already detected abnormal rows | ||
| # or columns | ||
| if suppress_points_in_strong_rowscols: | ||
| strong_rows = [row[0] for row in abnormal_rows] | ||
| strong_cols = [col[0] for col in abnormal_cols] | ||
| if excluing_zeros == 0: | ||
| # 1. Check for abnormal columns (including zeros) | ||
| col_means = mat.mean(axis=0) | ||
| # z_col = (col_means - col_means.mean()) / (col_means.std() + 1e-8) | ||
| z_col = col_means / (col_means.mean() + 1e-8) | ||
| abnormal_cols = [ | ||
| [j, col_means[j], z_col[j]] | ||
| for j in np.where(z_col > thres_col)[0] | ||
| ] | ||
|
|
||
| # 2. Check for abnormal rows (including zeros) | ||
| row_means = mat.mean(axis=1) | ||
| # z_row = (row_means - row_means.mean()) / (row_means.std() + 1e-8) | ||
| z_row = row_means / (row_means.mean() + 1e-8) | ||
| abnormal_rows = [ | ||
| [i, row_means[i], z_row[i]] | ||
| for i in np.where(z_row > thres_row)[0] | ||
| ] | ||
|
|
||
| # 3. Check for abnormal single points (including zeros) | ||
| # z_all = (mat - mat.mean()) / (mat.std() + 1e-8) | ||
| z_all = mat / (mat.mean() + 1e-8) | ||
| # Get all positions with z-score > threshold | ||
| abnormal_points = [ | ||
| [i, j, mat[i, j], z_all[i, j]] | ||
| for i in range(mat.shape[0]) | ||
| for j in range(mat.shape[1]) | ||
| if z_all[i, j] > thres_point | ||
| ] | ||
| # Optionally remove points that are in already detected abnormal | ||
| # rows or columns | ||
| if suppress_points_in_strong_rowscols: | ||
| strong_rows = [row[0] for row in abnormal_rows] | ||
| strong_cols = [col[0] for col in abnormal_cols] | ||
| abnormal_points = [ | ||
| [i, j, v, z] for [i, j, v, z] in abnormal_points | ||
| if i not in strong_rows and j not in strong_cols | ||
| ] | ||
| else: | ||
| # 1. Check for abnormal columns (excluding zeros in columns) | ||
|
||
| col_means = np.ma.masked_equal(mat, 0).mean(axis=0).filled(0) | ||
| # Calculate normalized values (exclude all-zero columns) | ||
| # Indices of columns with non-zero mean | ||
| valid_cols = np.where(col_means != 0)[0] | ||
| # Initialize all-zero array | ||
| z_col = np.zeros_like(col_means) | ||
| if len(valid_cols) > 0: | ||
| z_col[valid_cols] = col_means[valid_cols] / \ | ||
| (col_means[valid_cols].mean() + 1e-8) | ||
| # Detect abnormal columns (only non-zero columns) | ||
| abnormal_cols = [ | ||
| [j, col_means[j], z_col[j]] | ||
| for j in valid_cols | ||
| if z_col[j] > thres_col | ||
| ] | ||
|
|
||
| # 2. Check for abnormal rows (excluding zeros in rows) | ||
| row_means = np.ma.masked_equal(mat, 0).mean(axis=1).filled(0) | ||
| # Calculate normalized values (exclude all-zero rows) | ||
| # Indices of rows with non-zero mean | ||
| valid_rows = np.where(row_means != 0)[0] | ||
| # Initialize all-zero array | ||
| z_row = np.zeros_like(row_means) | ||
| if len(valid_rows) > 0: | ||
| z_row[valid_rows] = row_means[valid_rows] / \ | ||
| (row_means[valid_rows].mean() + 1e-8) | ||
| # Detect abnormal rows (only non-zero rows) | ||
| abnormal_rows = [ | ||
| [i, row_means[i], z_row[i]] | ||
| for i in valid_rows | ||
| if z_row[i] > thres_row | ||
| ] | ||
|
|
||
| # 3. Check for abnormal single points (excluding zeros) | ||
|
||
| mask = mat != 0 # Create mask for non-zero values | ||
| # Initialize all-zero array | ||
| z_all = np.zeros_like(mat, dtype=float) | ||
| if np.any(mask): # If non-zero values exist | ||
| # Calculate mean of non-zero values (global) | ||
| nonzero_mean = mat[mask].mean() | ||
| # Normalize only non-zero values | ||
| z_all[mask] = mat[mask] / (nonzero_mean + 1e-8) | ||
| # Detect abnormal points (non-zero values with z-score > threshold) | ||
| abnormal_points = [ | ||
| [i, j, v, z] for [i, j, v, z] in abnormal_points | ||
| if i not in strong_rows and j not in strong_cols | ||
| [i, j, mat[i, j], z_all[i, j]] | ||
| for i in range(mat.shape[0]) | ||
| for j in range(mat.shape[1]) | ||
| # Ensure non-zero and abnormal | ||
| if mask[i, j] and z_all[i, j] > thres_point | ||
| ] | ||
| # Optionally remove points in already detected abnormal | ||
| # rows/columns | ||
| if suppress_points_in_strong_rowscols: | ||
| # Use set for faster lookup | ||
| strong_rows = {row[0] for row in abnormal_rows} | ||
| strong_cols = {col[0] for col in abnormal_cols} | ||
| abnormal_points = [ | ||
| [i, j, v, z] for [i, j, v, z] in abnormal_points | ||
| if i not in strong_rows and j not in strong_cols | ||
| ] | ||
|
|
||
| # 4. Return for automatic processing | ||
| return { | ||
| "abnormal_cols": abnormal_cols, | ||
|
|
||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.