88 datetime ,
99)
1010from io import StringIO
11+ import warnings
1112
1213from dateutil .parser import parse as du_parse
1314from hypothesis import (
3940from pandas .core .indexes .datetimes import date_range
4041
4142import pandas .io .date_converters as conv
43+ from pandas .io .parsers import read_csv
4244
4345# constant
4446_DEFAULT_DATETIME = datetime (1 , 1 , 1 )
@@ -1556,16 +1558,16 @@ def test_invalid_parse_delimited_date(all_parsers, date_string):
15561558 "date_string,dayfirst,expected" ,
15571559 [
15581560 # %d/%m/%Y; month > 12 thus replacement
1559- ("13/02/2019" , False , datetime (2019 , 2 , 13 )),
15601561 ("13/02/2019" , True , datetime (2019 , 2 , 13 )),
15611562 # %m/%d/%Y; day > 12 thus there will be no replacement
15621563 ("02/13/2019" , False , datetime (2019 , 2 , 13 )),
1563- ("02/13/2019" , True , datetime (2019 , 2 , 13 )),
15641564 # %d/%m/%Y; dayfirst==True thus replacement
15651565 ("04/02/2019" , True , datetime (2019 , 2 , 4 )),
15661566 ],
15671567)
1568- def test_parse_delimited_date_swap (all_parsers , date_string , dayfirst , expected ):
1568+ def test_parse_delimited_date_swap_no_warning (
1569+ all_parsers , date_string , dayfirst , expected
1570+ ):
15691571 parser = all_parsers
15701572 expected = DataFrame ({0 : [expected ]}, dtype = "datetime64[ns]" )
15711573 result = parser .read_csv (
@@ -1574,6 +1576,30 @@ def test_parse_delimited_date_swap(all_parsers, date_string, dayfirst, expected)
15741576 tm .assert_frame_equal (result , expected )
15751577
15761578
1579+ @pytest .mark .parametrize (
1580+ "date_string,dayfirst,expected" ,
1581+ [
1582+ # %d/%m/%Y; month > 12 thus replacement
1583+ ("13/02/2019" , False , datetime (2019 , 2 , 13 )),
1584+ # %m/%d/%Y; day > 12 thus there will be no replacement
1585+ ("02/13/2019" , True , datetime (2019 , 2 , 13 )),
1586+ ],
1587+ )
1588+ def test_parse_delimited_date_swap_with_warning (
1589+ all_parsers , date_string , dayfirst , expected
1590+ ):
1591+ parser = all_parsers
1592+ expected = DataFrame ({0 : [expected ]}, dtype = "datetime64[ns]" )
1593+ warning_msg = (
1594+ "Provide format or specify infer_datetime_format=True for consistent parsing"
1595+ )
1596+ with tm .assert_produces_warning (UserWarning , match = warning_msg ):
1597+ result = parser .read_csv (
1598+ StringIO (date_string ), header = None , dayfirst = dayfirst , parse_dates = [0 ]
1599+ )
1600+ tm .assert_frame_equal (result , expected )
1601+
1602+
15771603def _helper_hypothesis_delimited_date (call , date_string , ** kwargs ):
15781604 msg , result = None , None
15791605 try :
@@ -1602,9 +1628,11 @@ def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, test_dateti
16021628 except_in_dateutil , except_out_dateutil = None , None
16031629 date_string = test_datetime .strftime (date_format .replace (" " , delimiter ))
16041630
1605- except_out_dateutil , result = _helper_hypothesis_delimited_date (
1606- parse_datetime_string , date_string , dayfirst = dayfirst
1607- )
1631+ with warnings .catch_warnings ():
1632+ warnings .filterwarnings ("ignore" , category = UserWarning )
1633+ except_out_dateutil , result = _helper_hypothesis_delimited_date (
1634+ parse_datetime_string , date_string , dayfirst = dayfirst
1635+ )
16081636 except_in_dateutil , expected = _helper_hypothesis_delimited_date (
16091637 du_parse ,
16101638 date_string ,
@@ -1674,3 +1702,95 @@ def test_date_parser_usecols_thousands(all_parsers):
16741702 )
16751703 expected = DataFrame ({"B" : [3 , 4 ], "C" : [Timestamp ("20-09-2001 01:00:00" )] * 2 })
16761704 tm .assert_frame_equal (result , expected )
1705+
1706+
1707+ def test_dayfirst_warnings ():
1708+ # GH 12585
1709+ warning_msg_day_first = (
1710+ "Parsing '31/12/2014' in DD/MM/YYYY format. Provide "
1711+ "format or specify infer_datetime_format=True for consistent parsing."
1712+ )
1713+ warning_msg_month_first = (
1714+ "Parsing '03/30/2011' in MM/DD/YYYY format. Provide "
1715+ "format or specify infer_datetime_format=True for consistent parsing."
1716+ )
1717+
1718+ # CASE 1: valid input
1719+ input = "date\n 31/12/2014\n 10/03/2011"
1720+ expected_consistent = DatetimeIndex (
1721+ ["2014-12-31" , "2011-03-10" ], dtype = "datetime64[ns]" , freq = None , name = "date"
1722+ )
1723+ expected_inconsistent = DatetimeIndex (
1724+ ["2014-12-31" , "2011-10-03" ], dtype = "datetime64[ns]" , freq = None , name = "date"
1725+ )
1726+
1727+ # A. dayfirst arg correct, no warning
1728+ res1 = read_csv (
1729+ StringIO (input ), parse_dates = ["date" ], dayfirst = True , index_col = "date"
1730+ ).index
1731+ tm .assert_index_equal (expected_consistent , res1 )
1732+
1733+ # B. dayfirst arg incorrect, warning + incorrect output
1734+ with tm .assert_produces_warning (UserWarning , match = warning_msg_day_first ):
1735+ res2 = read_csv (
1736+ StringIO (input ), parse_dates = ["date" ], dayfirst = False , index_col = "date"
1737+ ).index
1738+ tm .assert_index_equal (expected_inconsistent , res2 )
1739+
1740+ # C. dayfirst default arg, same as B
1741+ with tm .assert_produces_warning (UserWarning , match = warning_msg_day_first ):
1742+ res3 = read_csv (
1743+ StringIO (input ), parse_dates = ["date" ], dayfirst = False , index_col = "date"
1744+ ).index
1745+ tm .assert_index_equal (expected_inconsistent , res3 )
1746+
1747+ # D. infer_datetime_format=True overrides dayfirst default
1748+ # no warning + correct result
1749+ res4 = read_csv (
1750+ StringIO (input ),
1751+ parse_dates = ["date" ],
1752+ infer_datetime_format = True ,
1753+ index_col = "date" ,
1754+ ).index
1755+ tm .assert_index_equal (expected_consistent , res4 )
1756+
1757+ # CASE 2: invalid input
1758+ # cannot consistently process with single format
1759+ # warnings *always* raised
1760+
1761+ # first in DD/MM/YYYY, second in MM/DD/YYYY
1762+ input = "date\n 31/12/2014\n 03/30/2011"
1763+ expected = DatetimeIndex (
1764+ ["2014-12-31" , "2011-03-30" ], dtype = "datetime64[ns]" , freq = None , name = "date"
1765+ )
1766+
1767+ # A. use dayfirst=True
1768+ with tm .assert_produces_warning (UserWarning , match = warning_msg_month_first ):
1769+ res5 = read_csv (
1770+ StringIO (input ), parse_dates = ["date" ], dayfirst = True , index_col = "date"
1771+ ).index
1772+ tm .assert_index_equal (expected , res5 )
1773+
1774+ # B. use dayfirst=False
1775+ with tm .assert_produces_warning (UserWarning , match = warning_msg_day_first ):
1776+ res6 = read_csv (
1777+ StringIO (input ), parse_dates = ["date" ], dayfirst = False , index_col = "date"
1778+ ).index
1779+ tm .assert_index_equal (expected , res6 )
1780+
1781+ # C. use dayfirst default arg, same as B
1782+ with tm .assert_produces_warning (UserWarning , match = warning_msg_day_first ):
1783+ res7 = read_csv (
1784+ StringIO (input ), parse_dates = ["date" ], dayfirst = False , index_col = "date"
1785+ ).index
1786+ tm .assert_index_equal (expected , res7 )
1787+
1788+ # D. use infer_datetime_format=True
1789+ with tm .assert_produces_warning (UserWarning , match = warning_msg_day_first ):
1790+ res8 = read_csv (
1791+ StringIO (input ),
1792+ parse_dates = ["date" ],
1793+ infer_datetime_format = True ,
1794+ index_col = "date" ,
1795+ ).index
1796+ tm .assert_index_equal (expected , res8 )
0 commit comments