11import  io 
22import  tarfile 
33import  zipfile 
4+ from  unittest .mock  import  patch 
45
56import  pandas  as  pd 
67import  pytest 
78
8- from  janitor .io  import  read_archive 
9+ from  janitor .io  import  (
10+     _infer_file_type ,
11+     read_archive ,
12+ )
913
1014
15+ # Fixtures for creating test archives 
1116@pytest .fixture  
12- def  zip_test_file (tmp_path ):
13-     """Fixture pour créer un fichier ZIP de test .""" 
14-     zip_path  =  tmp_path  /  "test .zip" 
17+ def  dummy_zip_file (tmp_path ):
18+     """Create a dummy ZIP file containing two CSV files .""" 
19+     zip_path  =  tmp_path  /  "dummy .zip" 
1520    with  zipfile .ZipFile (zip_path , mode = "w" ) as  zf :
1621        zf .writestr ("file1.csv" , "col1,col2\n 1,2\n 3,4" )
1722        zf .writestr ("file2.csv" , "col3,col4\n 5,6\n 7,8" )
1823    return  zip_path 
1924
2025
2126@pytest .fixture  
22- def  tar_test_file (tmp_path ):
23-     """Fixture pour créer un fichier TAR de test .""" 
24-     tar_path  =  tmp_path  /  "test .tar.gz" 
27+ def  dummy_tar_file (tmp_path ):
28+     """Create a dummy TAR file containing two CSV files .""" 
29+     tar_path  =  tmp_path  /  "dummy .tar.gz" 
2530    with  tarfile .open (tar_path , mode = "w:gz" ) as  tf :
2631        info1  =  tarfile .TarInfo (name = "file1.csv" )
2732        data1  =  io .BytesIO (b"col1,col2\n 1,2\n 3,4" )
@@ -35,23 +40,27 @@ def tar_test_file(tmp_path):
3540    return  tar_path 
3641
3742
38- def  test_read_zip_archive (zip_test_file ):
43+ # Tests for reading archives via `read_archive` 
44+ def  test_read_zip_archive (dummy_zip_file ):
45+     """Test reading a specific file from a ZIP archive.""" 
3946    result  =  read_archive (
40-         str (zip_test_file ), extract_to_df = True , selected_files = ["file1.csv" ]
47+         str (dummy_zip_file ), extract_to_df = True , selected_files = ["file1.csv" ]
4148    )
4249    assert  isinstance (result , pd .DataFrame )
4350    assert  list (result .columns ) ==  ["col1" , "col2" ]
4451    assert  result .shape  ==  (2 , 2 )
4552
4653
47- def  test_list_files_in_zip (zip_test_file ):
48-     result  =  read_archive (str (zip_test_file ), extract_to_df = False )
54+ def  test_list_files_in_zip (dummy_zip_file ):
55+     """Test listing files in a ZIP archive.""" 
56+     result  =  read_archive (str (dummy_zip_file ), extract_to_df = False )
4957    assert  isinstance (result , list )
5058    assert  "file1.csv"  in  result 
5159    assert  "file2.csv"  in  result 
5260
5361
54- def  test_no_compatible_files (tmp_path ):
62+ def  test_no_compatible_files_in_zip (tmp_path ):
63+     """Test handling a ZIP archive with no compatible files.""" 
5564    zip_path  =  tmp_path  /  "empty.zip" 
5665    with  zipfile .ZipFile (zip_path , mode = "w" ) as  zf :
5766        zf .writestr ("file1.txt" , "Just some text" )
@@ -61,17 +70,82 @@ def test_no_compatible_files(tmp_path):
6170        read_archive (str (zip_path ))
6271
6372
64- def  test_read_tar_archive (tar_test_file ):
73+ def  test_read_tar_archive (dummy_tar_file ):
74+     """Test reading a specific file from a TAR archive.""" 
6575    result  =  read_archive (
66-         str (tar_test_file ), extract_to_df = True , selected_files = ["file1.csv" ]
76+         str (dummy_tar_file ), extract_to_df = True , selected_files = ["file1.csv" ]
6777    )
6878    assert  isinstance (result , pd .DataFrame )
6979    assert  list (result .columns ) ==  ["col1" , "col2" ]
7080    assert  result .shape  ==  (2 , 2 )
7181
7282
73- def  test_list_files_in_tar (tar_test_file ):
74-     result  =  read_archive (str (tar_test_file ), extract_to_df = False )
83+ def  test_list_files_in_tar (dummy_tar_file ):
84+     """Test listing files in a TAR archive.""" 
85+     result  =  read_archive (str (dummy_tar_file ), extract_to_df = False )
7586    assert  isinstance (result , list )
7687    assert  "file1.csv"  in  result 
7788    assert  "file2.csv"  in  result 
89+ 
90+ 
91+ def  test_no_compatible_files_in_tar (tmp_path ):
92+     """Test handling a TAR archive with no compatible files.""" 
93+     tar_path  =  tmp_path  /  "invalid.tar.gz" 
94+     with  tarfile .open (tar_path , mode = "w:gz" ) as  tf :
95+         info  =  tarfile .TarInfo (name = "file1.txt" )
96+         data  =  io .BytesIO (b"Just some text" )
97+         info .size  =  data .getbuffer ().nbytes 
98+         tf .addfile (info , data )
99+     with  pytest .raises (
100+         ValueError , match = "No compatible files found in the archive" 
101+     ):
102+         read_archive (str (tar_path ))
103+ 
104+ 
105+ # Tests for unsupported file types 
106+ def  test_read_archive_unsupported_file ():
107+     """Test handling unsupported file types.""" 
108+     with  pytest .raises (
109+         ValueError ,
110+         match = "Cannot infer file type from the file extension. " 
111+         "Please specify the 'file_type' parameter." ,
112+     ):
113+         read_archive ("test.unsupported" )
114+ 
115+ 
116+ def  test_read_archive_no_extension ():
117+     """Test handling files with no extension.""" 
118+     with  pytest .raises (
119+         ValueError ,
120+         match = "Cannot infer file type from the file extension. " 
121+         "Please specify the 'file_type' parameter." ,
122+     ):
123+         read_archive ("testfile" )
124+ 
125+ 
126+ # Tests for interactive file selection 
127+ def  test_interactive_file_selection_valid (dummy_zip_file ):
128+     """Test valid input for interactive file selection.""" 
129+     user_input  =  "1,2" 
130+     with  patch ("builtins.input" , return_value = user_input ):
131+         result  =  read_archive (str (dummy_zip_file ), extract_to_df = False )
132+         assert  "file1.csv"  in  result 
133+         assert  "file2.csv"  in  result 
134+ 
135+ 
136+ # Tests for file type inference 
137+ def  test_infer_file_type_valid ():
138+     """Test valid file type inference.""" 
139+     assert  _infer_file_type ("test.zip" ) ==  "zip" 
140+     assert  _infer_file_type ("test.tar" ) ==  "tar" 
141+     assert  _infer_file_type ("test.tar.gz" ) ==  "tar.gz" 
142+ 
143+ 
144+ def  test_infer_file_type_invalid ():
145+     """Test invalid file type inference.""" 
146+     with  pytest .raises (
147+         ValueError ,
148+         match = "Cannot infer file type from the file extension. " 
149+         "Please specify the 'file_type' parameter." ,
150+     ):
151+         _infer_file_type ("testfile" )
0 commit comments