11import io
22import tarfile
33import zipfile
4+ from unittest .mock import patch
45
56import pandas as pd
67import pytest
78
8- from janitor .io import read_archive
9+ from janitor .io import (
10+ _infer_file_type ,
11+ read_archive ,
12+ )
913
1014
15+ # Fixtures for creating test archives
1116@pytest .fixture
12- def zip_test_file (tmp_path ):
13- """Fixture pour créer un fichier ZIP de test ."""
14- zip_path = tmp_path / "test .zip"
17+ def dummy_zip_file (tmp_path ):
18+ """Create a dummy ZIP file containing two CSV files ."""
19+ zip_path = tmp_path / "dummy .zip"
1520 with zipfile .ZipFile (zip_path , mode = "w" ) as zf :
1621 zf .writestr ("file1.csv" , "col1,col2\n 1,2\n 3,4" )
1722 zf .writestr ("file2.csv" , "col3,col4\n 5,6\n 7,8" )
1823 return zip_path
1924
2025
2126@pytest .fixture
22- def tar_test_file (tmp_path ):
23- """Fixture pour créer un fichier TAR de test ."""
24- tar_path = tmp_path / "test .tar.gz"
27+ def dummy_tar_file (tmp_path ):
28+ """Create a dummy TAR file containing two CSV files ."""
29+ tar_path = tmp_path / "dummy .tar.gz"
2530 with tarfile .open (tar_path , mode = "w:gz" ) as tf :
2631 info1 = tarfile .TarInfo (name = "file1.csv" )
2732 data1 = io .BytesIO (b"col1,col2\n 1,2\n 3,4" )
@@ -35,23 +40,27 @@ def tar_test_file(tmp_path):
3540 return tar_path
3641
3742
38- def test_read_zip_archive (zip_test_file ):
43+ # Tests for reading archives via `read_archive`
44+ def test_read_zip_archive (dummy_zip_file ):
45+ """Test reading a specific file from a ZIP archive."""
3946 result = read_archive (
40- str (zip_test_file ), extract_to_df = True , selected_files = ["file1.csv" ]
47+ str (dummy_zip_file ), extract_to_df = True , selected_files = ["file1.csv" ]
4148 )
4249 assert isinstance (result , pd .DataFrame )
4350 assert list (result .columns ) == ["col1" , "col2" ]
4451 assert result .shape == (2 , 2 )
4552
4653
47- def test_list_files_in_zip (zip_test_file ):
48- result = read_archive (str (zip_test_file ), extract_to_df = False )
54+ def test_list_files_in_zip (dummy_zip_file ):
55+ """Test listing files in a ZIP archive."""
56+ result = read_archive (str (dummy_zip_file ), extract_to_df = False )
4957 assert isinstance (result , list )
5058 assert "file1.csv" in result
5159 assert "file2.csv" in result
5260
5361
54- def test_no_compatible_files (tmp_path ):
62+ def test_no_compatible_files_in_zip (tmp_path ):
63+ """Test handling a ZIP archive with no compatible files."""
5564 zip_path = tmp_path / "empty.zip"
5665 with zipfile .ZipFile (zip_path , mode = "w" ) as zf :
5766 zf .writestr ("file1.txt" , "Just some text" )
@@ -61,17 +70,82 @@ def test_no_compatible_files(tmp_path):
6170 read_archive (str (zip_path ))
6271
6372
64- def test_read_tar_archive (tar_test_file ):
73+ def test_read_tar_archive (dummy_tar_file ):
74+ """Test reading a specific file from a TAR archive."""
6575 result = read_archive (
66- str (tar_test_file ), extract_to_df = True , selected_files = ["file1.csv" ]
76+ str (dummy_tar_file ), extract_to_df = True , selected_files = ["file1.csv" ]
6777 )
6878 assert isinstance (result , pd .DataFrame )
6979 assert list (result .columns ) == ["col1" , "col2" ]
7080 assert result .shape == (2 , 2 )
7181
7282
73- def test_list_files_in_tar (tar_test_file ):
74- result = read_archive (str (tar_test_file ), extract_to_df = False )
83+ def test_list_files_in_tar (dummy_tar_file ):
84+ """Test listing files in a TAR archive."""
85+ result = read_archive (str (dummy_tar_file ), extract_to_df = False )
7586 assert isinstance (result , list )
7687 assert "file1.csv" in result
7788 assert "file2.csv" in result
89+
90+
91+ def test_no_compatible_files_in_tar (tmp_path ):
92+ """Test handling a TAR archive with no compatible files."""
93+ tar_path = tmp_path / "invalid.tar.gz"
94+ with tarfile .open (tar_path , mode = "w:gz" ) as tf :
95+ info = tarfile .TarInfo (name = "file1.txt" )
96+ data = io .BytesIO (b"Just some text" )
97+ info .size = data .getbuffer ().nbytes
98+ tf .addfile (info , data )
99+ with pytest .raises (
100+ ValueError , match = "No compatible files found in the archive"
101+ ):
102+ read_archive (str (tar_path ))
103+
104+
105+ # Tests for unsupported file types
106+ def test_read_archive_unsupported_file ():
107+ """Test handling unsupported file types."""
108+ with pytest .raises (
109+ ValueError ,
110+ match = "Cannot infer file type from the file extension. "
111+ "Please specify the 'file_type' parameter." ,
112+ ):
113+ read_archive ("test.unsupported" )
114+
115+
116+ def test_read_archive_no_extension ():
117+ """Test handling files with no extension."""
118+ with pytest .raises (
119+ ValueError ,
120+ match = "Cannot infer file type from the file extension. "
121+ "Please specify the 'file_type' parameter." ,
122+ ):
123+ read_archive ("testfile" )
124+
125+
126+ # Tests for interactive file selection
127+ def test_interactive_file_selection_valid (dummy_zip_file ):
128+ """Test valid input for interactive file selection."""
129+ user_input = "1,2"
130+ with patch ("builtins.input" , return_value = user_input ):
131+ result = read_archive (str (dummy_zip_file ), extract_to_df = False )
132+ assert "file1.csv" in result
133+ assert "file2.csv" in result
134+
135+
136+ # Tests for file type inference
137+ def test_infer_file_type_valid ():
138+ """Test valid file type inference."""
139+ assert _infer_file_type ("test.zip" ) == "zip"
140+ assert _infer_file_type ("test.tar" ) == "tar"
141+ assert _infer_file_type ("test.tar.gz" ) == "tar.gz"
142+
143+
144+ def test_infer_file_type_invalid ():
145+ """Test invalid file type inference."""
146+ with pytest .raises (
147+ ValueError ,
148+ match = "Cannot infer file type from the file extension. "
149+ "Please specify the 'file_type' parameter." ,
150+ ):
151+ _infer_file_type ("testfile" )
0 commit comments