Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,23 @@ jobs:
- name: Free up disk space
run: |
# Clear some space (https://github.com/actions/runner-images/issues/2840)
sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/share/boost
echo "Disk usage before cleanup:"
df -h

# Remove unnecessary pre-installed software
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/share/boost
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo rm -rf /usr/local/.ghcup
sudo rm -rf /usr/share/swift

# Clean up docker to ensure we start fresh
docker system prune -af --volumes

echo "Disk usage after cleanup:"
df -h
- name: Test Dockerfile
run: |
python${{ env.PYTHON_VERSION }} -m venv .venv
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## 0.0.91
* Upgrade packages to resolve CVEs

## 0.0.90
* Upgrade version to pull in latest unstructured verison and bump versions of dependancies.

Expand Down
2 changes: 1 addition & 1 deletion prepline_general/api/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.90" # pragma: no cover
__version__ = "0.0.91" # pragma: no cover
2 changes: 1 addition & 1 deletion preprocessing-pipeline-family.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
name: general
version: 0.0.90
version: 0.0.91
86 changes: 42 additions & 44 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,29 @@
#
# pip-compile --no-strip-extras ./requirements/base.in
#
accelerate==1.11.0
accelerate==1.12.0
# via unstructured-inference
aiofiles==25.1.0
# via unstructured-client
annotated-doc==0.0.3
annotated-doc==0.0.4
# via fastapi
annotated-types==0.7.0
# via pydantic
antlr4-python3-runtime==4.9.3
# via omegaconf
anyio==4.11.0
anyio==4.12.0
# via
# httpx
# starlette
backoff==2.2.1
# via
# -r requirements/base.in
# unstructured
beautifulsoup4==4.14.2
beautifulsoup4==4.14.3
# via unstructured
cachetools==6.2.1
cachetools==6.2.4
# via google-auth
certifi==2025.10.5
certifi==2025.11.12
# via
# httpcore
# httpx
Expand All @@ -38,7 +38,7 @@ charset-normalizer==3.4.4
# pdfminer-six
# requests
# unstructured
click==8.3.0
click==8.3.1
# via
# -r requirements/base.in
# nltk
Expand All @@ -65,26 +65,26 @@ emoji==2.15.0
# via unstructured
et-xmlfile==2.0.0
# via openpyxl
fastapi==0.121.0
fastapi==0.128.0
# via -r requirements/base.in
filelock==3.20.0
filelock==3.20.1
# via
# huggingface-hub
# torch
# transformers
filetype==1.2.0
# via unstructured
flatbuffers==25.9.23
flatbuffers==25.12.19
# via onnxruntime
fonttools==4.60.1
fonttools==4.61.1
# via matplotlib
fsspec==2025.10.0
fsspec==2025.12.0
# via
# huggingface-hub
# torch
google-api-core[grpc]==2.28.1
# via google-cloud-vision
google-auth==2.43.0
google-auth==2.45.0
# via
# google-api-core
# google-cloud-vision
Expand Down Expand Up @@ -131,7 +131,7 @@ idna==3.11
# requests
jinja2==3.1.6
# via torch
joblib==1.5.2
joblib==1.5.3
# via nltk
kiwisolver==1.4.9
# via matplotlib
Expand All @@ -147,19 +147,19 @@ markdown==3.10
# via unstructured
markupsafe==3.0.3
# via jinja2
marshmallow==3.26.1
marshmallow==3.26.2
# via dataclasses-json
matplotlib==3.10.7
matplotlib==3.10.8
# via unstructured-inference
ml-dtypes==0.5.3
ml-dtypes==0.5.4
# via onnx
mpmath==1.3.0
# via sympy
msoffcrypto-tool==5.4.2
# via unstructured
mypy-extensions==1.1.0
# via typing-inspect
networkx==3.5
networkx==3.6.1
# via
# torch
# unstructured
Expand Down Expand Up @@ -188,7 +188,7 @@ olefile==0.47
# python-oxmsg
omegaconf==2.3.0
# via effdet
onnx==1.19.1
onnx==1.20.0
# via
# unstructured
# unstructured-inference
Expand Down Expand Up @@ -216,13 +216,13 @@ pandas==2.3.3
# unstructured-inference
pdf2image==1.17.0
# via unstructured
pdfminer-six==20250506
pdfminer-six==20251230
# via
# unstructured
# unstructured-inference
pi-heif==1.1.1
# via unstructured
pikepdf==10.0.0
pikepdf==10.1.0
# via unstructured
pillow==12.0.0
# via
Expand All @@ -233,11 +233,11 @@ pillow==12.0.0
# python-pptx
# torchvision
# unstructured-pytesseract
proto-plus==1.26.1
proto-plus==1.27.0
# via
# google-api-core
# google-cloud-vision
protobuf==6.33.0
protobuf==6.33.2
# via
# google-api-core
# google-cloud-vision
Expand All @@ -246,7 +246,7 @@ protobuf==6.33.0
# onnx
# onnxruntime
# proto-plus
psutil==7.1.3
psutil==7.2.1
# via
# -r requirements/base.in
# accelerate
Expand All @@ -257,40 +257,40 @@ pyasn1==0.6.1
# rsa
pyasn1-modules==0.4.2
# via google-auth
pycocotools==2.0.10
pycocotools==2.0.11
# via effdet
pycparser==2.23
# via cffi
pycryptodome==3.23.0
# via -r requirements/base.in
pydantic==2.12.4
pydantic==2.12.5
# via
# fastapi
# unstructured-client
pydantic-core==2.41.5
# via pydantic
pypandoc==1.15
pypandoc==1.16.2
# via unstructured
pyparsing==3.2.5
pyparsing==3.3.1
# via matplotlib
pypdf==6.1.3
pypdf==6.5.0
# via
# -r requirements/base.in
# unstructured
# unstructured-client
pypdfium2==5.0.0
pypdfium2==5.2.0
# via unstructured-inference
python-dateutil==2.9.0.post0
# via
# matplotlib
# pandas
python-docx==1.2.0
# via unstructured
python-iso639==2025.2.18
python-iso639==2025.11.16
# via unstructured
python-magic==0.4.27
# via unstructured
python-multipart==0.0.20
python-multipart==0.0.21
# via unstructured-inference
python-oxmsg==0.0.2
# via unstructured
Expand Down Expand Up @@ -327,7 +327,7 @@ requests-toolbelt==1.0.0
# via unstructured-client
rsa==4.9.1
# via google-auth
safetensors==0.6.2
safetensors==0.7.0
# via
# accelerate
# timm
Expand All @@ -339,9 +339,7 @@ six==1.17.0
# html5lib
# langdetect
# python-dateutil
sniffio==1.3.1
# via anyio
soupsieve==2.8
soupsieve==2.8.1
# via beautifulsoup4
starlette==0.41.2
# via
Expand All @@ -357,14 +355,14 @@ timm==1.0.22
# unstructured-inference
tokenizers==0.22.1
# via transformers
torch==2.9.0
torch==2.9.1
# via
# accelerate
# effdet
# timm
# torchvision
# unstructured-inference
torchvision==0.24.0
torchvision==0.24.1
# via
# effdet
# timm
Expand All @@ -374,7 +372,7 @@ tqdm==4.67.1
# nltk
# transformers
# unstructured
transformers==4.57.1
transformers==4.57.3
# via unstructured-inference
typing-extensions==4.15.0
# via
Expand All @@ -397,19 +395,19 @@ typing-inspect==0.9.0
# via dataclasses-json
typing-inspection==0.4.2
# via pydantic
tzdata==2025.2
tzdata==2025.3
# via pandas
unstructured[all-docs]==0.18.18
unstructured[all-docs]==0.18.24
# via -r requirements/base.in
unstructured-client==0.42.3
unstructured-client==0.42.6
# via unstructured
unstructured-inference==1.1.1
# via unstructured
unstructured-pytesseract==0.3.15
# via unstructured
urllib3==2.5.0
urllib3==2.6.2
# via requests
uvicorn==0.38.0
uvicorn==0.40.0
# via -r requirements/base.in
webencodings==0.5.1
# via html5lib
Expand Down
4 changes: 1 addition & 3 deletions requirements/constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@
#
# pip-compile --no-strip-extras ./requirements/constraints.in
#
anyio==4.11.0
anyio==4.12.0
# via starlette
idna==3.11
# via anyio
numpy==1.26.4
# via -r requirements/constraints.in
sniffio==1.3.1
# via anyio
starlette==0.41.2
# via -r requirements/constraints.in
typing-extensions==4.15.0
Expand Down
Loading
Loading