chardet
filetype
python-magic
lxml
nltk
tabulate
requests
beautifulsoup4
emoji
dataclasses-json
python-iso639
langdetect
numpy
rapidfuzz
backoff
typing-extensions
unstructured-client
wrapt

[airtable]
pyairtable

[all-docs]
unstructured.pytesseract>=0.3.12
python-pptx<=0.6.23
xlrd
pdf2image
markdown
pandas
pypandoc
pikepdf
pypdf
unstructured-inference==0.7.31
openpyxl
onnx
msg_parser
google-cloud-vision
pdfminer.six
pillow_heif
networkx
python-docx

[astra]
astrapy

[azure]
adlfs
fsspec

[azure-cognitive-search]
azure-search-documents

[bedrock]
boto3
langchain-community

[biomed]
bs4

[box]
boxfs
fsspec

[chroma]
chromadb
importlib-metadata>=7.1.0
typer<=0.9.0

[clarifai]
clarifai

[confluence]
atlassian-python-api

[csv]
pandas

[databricks-volumes]
databricks-sdk

[delta-table]
deltalake
fsspec

[discord]
discord-py

[doc]
python-docx

[docx]
python-docx

[dropbox]
dropboxdrivefs
fsspec

[elasticsearch]
elasticsearch

[embed-huggingface]
huggingface
langchain-community
sentence_transformers

[embed-octoai]
openai
tiktoken

[embed-vertexai]
langchain
langchain-community
langchain-google-vertexai

[epub]
pypandoc

[gcs]
gcsfs
fsspec
bs4

[github]
pygithub>1.58.0

[gitlab]
python-gitlab

[google-drive]
google-api-python-client

[hubspot]
hubspot-api-client
urllib3

[huggingface]
langdetect
sacremoses
sentencepiece
torch
transformers

[image]
onnx
pdf2image
pdfminer.six
pikepdf
pillow_heif
pypdf
unstructured-inference==0.7.31
unstructured.pytesseract>=0.3.12
google-cloud-vision

[jira]
atlassian-python-api

[local-inference]
unstructured.pytesseract>=0.3.12
python-pptx<=0.6.23
xlrd
pdf2image
markdown
pandas
pypandoc
pikepdf
pypdf
unstructured-inference==0.7.31
openpyxl
onnx
msg_parser
google-cloud-vision
pdfminer.six
pillow_heif
networkx
python-docx

[md]
markdown

[mongodb]
pymongo

[msg]
msg_parser

[notion]
notion-client
htmlBuilder

[odt]
python-docx
pypandoc

[onedrive]
msal
Office365-REST-Python-Client
bs4

[openai]
langchain-community
tiktoken
openai

[opensearch]
opensearch-py

[org]
pypandoc

[outlook]
msal
Office365-REST-Python-Client

[paddleocr]
unstructured.paddleocr==2.6.1.3

[pdf]
onnx
pdf2image
pdfminer.six
pikepdf
pillow_heif
pypdf
unstructured-inference==0.7.31
unstructured.pytesseract>=0.3.12
google-cloud-vision

[pinecone]
pinecone-client>=3.7.1

[postgres]
psycopg2-binary

[ppt]
python-pptx<=0.6.23

[pptx]
python-pptx<=0.6.23

[qdrant]
qdrant-client

[reddit]
praw

[rst]
pypandoc

[rtf]
pypandoc

[s3]
s3fs
fsspec

[salesforce]
simple-salesforce

[sftp]
fsspec
paramiko

[sharepoint]
msal
Office365-REST-Python-Client

[slack]
slack_sdk

[tsv]
pandas

[weaviate]
weaviate-client

[wikipedia]
wikipedia

[xlsx]
openpyxl
pandas
xlrd
networkx
