22from pathlib import Path
33
44from langchain_core .documents import Document
5- from langchain_community .document_loaders import CSVLoader
65
7- EXAMPLE_DOCS_DIRECTORY = str (Path (__file__ ).parent .parent / "examples/" )
6+ from langchain_community .document_loaders import CSVLoader , PyPDFLoader
7+
8+ EXAMPLE_DOCS_DIRECTORY = str (Path (__file__ ).parent .parent .parent / "examples/" )
9+
810
911def test_pebblo_import () -> None :
1012 """Test that the Pebblo safe loader can be imported."""
1113 from langchain_community .document_loaders import PebbloSafeLoader # noqa: F401
1214
15+
1316def test_empty_filebased_loader () -> None :
1417 """Test basic file based csv loader."""
1518 # Setup
1619 from langchain_community .document_loaders import PebbloSafeLoader
20+
1721 file_path = os .path .join (EXAMPLE_DOCS_DIRECTORY , "test_empty.csv" )
1822 expected_docs : list = []
1923
2024 # Exercise
2125 loader = PebbloSafeLoader (
2226 CSVLoader (file_path = file_path ),
23- "dummy_app_name" , "dummy_owner" ,"dummy_description"
24- )
27+ "dummy_app_name" ,
28+ "dummy_owner" ,
29+ "dummy_description" ,
30+ )
2531 result = loader .load ()
2632
2733 # Assert
2834 assert result == expected_docs
2935
36+
3037def test_csv_loader_load_valid_data () -> None :
3138 # Setup
3239 from langchain_community .document_loaders import PebbloSafeLoader
40+
3341 file_path = os .path .join (EXAMPLE_DOCS_DIRECTORY , "test_nominal.csv" )
3442 expected_docs = [
3543 Document (
@@ -45,37 +53,33 @@ def test_csv_loader_load_valid_data() -> None:
4553 # Exercise
4654 loader = PebbloSafeLoader (
4755 CSVLoader (file_path = file_path ),
48- "dummy_app_name" , "dummy_owner" ,"dummy_description"
49- )
56+ "dummy_app_name" ,
57+ "dummy_owner" ,
58+ "dummy_description" ,
59+ )
5060 result = loader .load ()
5161
5262 # Assert
5363 assert result == expected_docs
5464
55- def test_csv_lazy_load ():
56- # Setup
65+
66+ def test_pdf_lazy_load ():
67+ # Setup
5768 from langchain_community .document_loaders import PebbloSafeLoader
58- file_path = os .path .join (EXAMPLE_DOCS_DIRECTORY , "test_nominal.csv" )
59- expected_docs = [
60- Document (
61- page_content = "column1: value1\n column2: value2\n column3: value3" ,
62- metadata = {"source" : file_path , "row" : 0 },
63- ),
64- Document (
65- page_content = "column1: value4\n column2: value5\n column3: value6" ,
66- metadata = {"source" : file_path , "row" : 1 },
67- ),
68- ]
69+
70+ file_path = os .path .join (
71+ EXAMPLE_DOCS_DIRECTORY , "multi-page-forms-sample-2-page.pdf"
72+ )
6973
7074 # Exercise
7175 loader = PebbloSafeLoader (
72- CSVLoader (file_path = file_path ),
73- "dummy_app_name" , "dummy_owner" ,"dummy_description"
74- )
76+ PyPDFLoader (file_path = file_path ),
77+ "dummy_app_name" ,
78+ "dummy_owner" ,
79+ "dummy_description" ,
80+ )
7581
76- result = []
77- for doc in loader .lazy_load ():
78- result .extend (doc )
82+ result = list (loader .lazy_load ())
7983
8084 # Assert
81- assert result == expected_docs
85+ assert len ( result ) == 2
0 commit comments