You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
i changed the code slightly to point it to a directory with pdf files ..
loader = DirectoryLoader("Q:/", recursive=True)
And keep getting the following errors ..
I have tried:
pip3 install pdf2image pdfminer.six
somewhere else advised
pip install unstructured==0.7.12
however then I got "pytesseract.pytesseract.TesseractNotFoundError: tesseract is not installed or it's not in your PATH. See README file for more information."
so I did a pip install tesseract
And now I end up back with the poppler error
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\pdf2image\pdf2image.py", line 568, in pdfinfo_from_path
proc = Popen(command, env=env, stdout=PIPE, stderr=PIPE)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\subprocess.py", line 1026, in init
self._execute_child(args, executable, preexec_fn, close_fds,
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\subprocess.py", line 1538, in _execute_child
hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [WinError 2] The system cannot find the file specified
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\User\Downloads\chatgpt-retrieval-main\chatgpt-retrieval-main\chatgpt.py", line 37, in
index = VectorstoreIndexCreator(vectorstore_kwargs={"persist_directory":"persist"}).from_loaders([loader])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\indexes\vectorstore.py", line 81, in from_loaders
docs.extend(loader.load())
^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\document_loaders\directory.py", line 156, in load
self.load_file(i, p, docs, pbar)
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\document_loaders\directory.py", line 105, in load_file
raise e
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\document_loaders\directory.py", line 99, in load_file
sub_docs = self.loader_cls(str(item), **self.loader_kwargs).load()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\document_loaders\unstructured.py", line 86, in load
elements = self._get_elements()
^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\document_loaders\unstructured.py", line 172, in _get_elements
return partition(filename=self.file_path, **self.unstructured_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\unstructured\partition\auto.py", line 180, in partition
elements = partition_pdf(
^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\unstructured\documents\elements.py", line 138, in wrapper
elements = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\unstructured\file_utils\filetype.py", line 519, in wrapper
elements = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\unstructured\partition\pdf.py", line 83, in partition_pdf
return partition_pdf_or_image(
^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\unstructured\partition\pdf.py", line 141, in partition_pdf_or_image
return _partition_pdf_or_image_with_ocr(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\unstructured\utils.py", line 43, in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\unstructured\partition\pdf.py", line 353, in _partition_pdf_or_image_with_ocr
document = pdf2image.convert_from_path(filename)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\pdf2image\pdf2image.py", line 127, in convert_from_path
page_count = pdfinfo_from_path(
^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\pdf2image\pdf2image.py", line 594, in pdfinfo_from_path
raise PDFInfoNotInstalledError(
pdf2image.exceptions.PDFInfoNotInstalledError: Unable to get page count. Is poppler installed and in PATH?
The text was updated successfully, but these errors were encountered:
i changed the code slightly to point it to a directory with pdf files ..
loader = DirectoryLoader("Q:/", recursive=True)
And keep getting the following errors ..
I have tried:
pip3 install pdf2image pdfminer.six
somewhere else advised
pip install unstructured==0.7.12
however then I got "pytesseract.pytesseract.TesseractNotFoundError: tesseract is not installed or it's not in your PATH. See README file for more information."
so I did a pip install tesseract
And now I end up back with the poppler error
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\pdf2image\pdf2image.py", line 568, in pdfinfo_from_path
proc = Popen(command, env=env, stdout=PIPE, stderr=PIPE)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\subprocess.py", line 1026, in init
self._execute_child(args, executable, preexec_fn, close_fds,
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\subprocess.py", line 1538, in _execute_child
hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [WinError 2] The system cannot find the file specified
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\User\Downloads\chatgpt-retrieval-main\chatgpt-retrieval-main\chatgpt.py", line 37, in
index = VectorstoreIndexCreator(vectorstore_kwargs={"persist_directory":"persist"}).from_loaders([loader])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\indexes\vectorstore.py", line 81, in from_loaders
docs.extend(loader.load())
^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\document_loaders\directory.py", line 156, in load
self.load_file(i, p, docs, pbar)
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\document_loaders\directory.py", line 105, in load_file
raise e
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\document_loaders\directory.py", line 99, in load_file
sub_docs = self.loader_cls(str(item), **self.loader_kwargs).load()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\document_loaders\unstructured.py", line 86, in load
elements = self._get_elements()
^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\document_loaders\unstructured.py", line 172, in _get_elements
return partition(filename=self.file_path, **self.unstructured_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\unstructured\partition\auto.py", line 180, in partition
elements = partition_pdf(
^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\unstructured\documents\elements.py", line 138, in wrapper
elements = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\unstructured\file_utils\filetype.py", line 519, in wrapper
elements = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\unstructured\partition\pdf.py", line 83, in partition_pdf
return partition_pdf_or_image(
^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\unstructured\partition\pdf.py", line 141, in partition_pdf_or_image
return _partition_pdf_or_image_with_ocr(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\unstructured\utils.py", line 43, in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\unstructured\partition\pdf.py", line 353, in _partition_pdf_or_image_with_ocr
document = pdf2image.convert_from_path(filename)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\pdf2image\pdf2image.py", line 127, in convert_from_path
page_count = pdfinfo_from_path(
^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\pdf2image\pdf2image.py", line 594, in pdfinfo_from_path
raise PDFInfoNotInstalledError(
pdf2image.exceptions.PDFInfoNotInstalledError: Unable to get page count. Is poppler installed and in PATH?
The text was updated successfully, but these errors were encountered: