Skip to content

Commit f083977

Browse files
authored
Merge pull request #5386 from codeflash-ai/codeflash/optimize-validate_gantt-mhcxyu68
⚡️ Speed up function `validate_gantt` by 58x
2 parents 043ff90 + 79fe9f4 commit f083977

File tree

3 files changed

+102
-3
lines changed

3 files changed

+102
-3
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ This project adheres to [Semantic Versioning](http://semver.org/).
44

55
## Unreleased
66

7+
### Updated
8+
- Speed up `validate_gantt` function [[#5386](https://github.com/plotly/plotly.py/pull/5386)], with thanks to @misrasaurabh1 for the contribution!
9+
710
## [6.5.0] - 2025-11-17
811

912
### Updated

plotly/figure_factory/_gantt.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,13 @@ def validate_gantt(df):
3939
"following keys: {0}".format(", ".join(REQUIRED_GANTT_KEYS))
4040
)
4141

42+
columns = {key: df[key].values for key in df}
4243
num_of_rows = len(df.index)
4344
chart = []
45+
# Using only keys present in the DataFrame columns
46+
keys = list(df.columns)
4447
for index in range(num_of_rows):
45-
task_dict = {}
46-
for key in df:
47-
task_dict[key] = df.iloc[index][key]
48+
task_dict = {key: columns[key][index] for key in keys}
4849
chart.append(task_dict)
4950

5051
return chart
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import pytest
2+
3+
from plotly import exceptions, optional_imports
4+
from plotly.figure_factory._gantt import validate_gantt
5+
6+
pd = optional_imports.get_module("pandas")
7+
8+
9+
@pytest.mark.parametrize("input_type", ["list", "dataframe"])
10+
def test_valid_with_extra_keys(input_type):
11+
"""Test that extra keys beyond required ones are preserved."""
12+
data = [
13+
{"Task": "A", "Start": "2020-01-01", "Finish": "2020-01-02", "Resource": "X"},
14+
{"Task": "B", "Start": "2020-01-03", "Finish": "2020-01-04", "Resource": "Y"},
15+
]
16+
if input_type == "dataframe":
17+
input_data = pd.DataFrame(data)
18+
result = validate_gantt(input_data)
19+
assert isinstance(result, list)
20+
assert set(result[0].keys()) == set(input_data.columns)
21+
else:
22+
input_data = data
23+
result = validate_gantt(input_data)
24+
assert result is input_data
25+
26+
assert len(result) == 2
27+
assert all("Resource" in row for row in result)
28+
assert set(result[0].keys()) == set(["Task", "Start", "Finish", "Resource"])
29+
assert result[0]["Task"] == "A"
30+
assert result[1]["Finish"] == "2020-01-04"
31+
32+
33+
def test_missing_required_key_in_dataframe():
34+
df = pd.DataFrame(
35+
[
36+
{"Task": "A", "Start": "2020-01-01"}, # Missing "Finish"
37+
]
38+
)
39+
with pytest.raises(exceptions.PlotlyError):
40+
validate_gantt(df)
41+
42+
43+
def test_empty_list():
44+
with pytest.raises(exceptions.PlotlyError):
45+
validate_gantt([])
46+
47+
48+
def test_input_is_not_list_or_dataframe():
49+
with pytest.raises(exceptions.PlotlyError):
50+
validate_gantt("Not a list or DataFrame")
51+
52+
53+
def test_dataframe_with_no_rows():
54+
df = pd.DataFrame(columns=["Task", "Start", "Finish"])
55+
result = validate_gantt(df)
56+
assert isinstance(result, list)
57+
assert result == []
58+
59+
60+
def test_list_with_dict_missing_all_keys():
61+
input_data = [{"Resource": "X"}]
62+
# Should NOT raise: list input is not validated for keys
63+
result = validate_gantt(input_data)
64+
assert result is input_data
65+
66+
67+
def test_large_list_with_non_dict_first_element():
68+
input_data = [
69+
"Not a dict",
70+
*[
71+
{
72+
"Task": f"Task{i}",
73+
"Start": f"2020-01-{i % 30 + 1:02d}",
74+
"Finish": f"2020-02-{i % 28 + 1:02d}",
75+
}
76+
for i in range(999)
77+
],
78+
]
79+
with pytest.raises(exceptions.PlotlyError):
80+
validate_gantt(input_data)
81+
82+
83+
def test_dataframe_column_order_and_index():
84+
df = pd.DataFrame(
85+
[
86+
{"Finish": "2023-01-02", "Start": "2023-01-01", "Task": "A"},
87+
{"Finish": "2023-01-03", "Start": "2023-01-02", "Task": "B"},
88+
],
89+
index=["x", "y"],
90+
)
91+
result = validate_gantt(df)
92+
assert len(result) == 2
93+
# Ensure values preserved regardless of order/index
94+
assert result[0]["Task"] == "A"
95+
assert set(result[0].keys()) == set(["Task", "Start", "Finish"])

0 commit comments

Comments
 (0)