Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .github/workflows/codespell.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Codespell configuration is within .codespellrc
---
name: Codespell

on:
- push
- pull_request

permissions:
contents: read

jobs:
codespell:
name: Check for spelling errors
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v4
- name: Codespell
uses: codespell-project/actions-codespell@v2
8 changes: 8 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
name: Ruff
on: [push, pull_request]
jobs:
ruff:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: chartboost/ruff-action@v1
12 changes: 12 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,15 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# Ruff
.ruff_cache/

# Pyright
pyrightconfig.json

# MacOS
.DS_Store

# Poetry
poetry.lock
49 changes: 47 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<h1 align="center">
Dataverse Uploader</br>
<a href="https://badge.fury.io/py/dvuploader"><img src="https://badge.fury.io/py/dvuploader.svg" alt="PyPI version" height="18"></a>
<img src="https://img.shields.io/badge/python-3.8 | 3.9 | 3.10 | 3.11-blue.svg" alt="Build Badge">
<img src="https://img.shields.io/badge/python-3.9 | 3.10 | 3.11-blue.svg" alt="Build Badge">
<img src="https://github.com/gdcc/python-dvuploader/actions/workflows/test.yml/badge.svg" alt="Build Badge">
</h1>

Expand Down Expand Up @@ -81,7 +81,7 @@ dvuploader my_file.txt my_other_file.txt \

#### Using a config file

Alternatively, you can also supply a `config` file that contains all necessary informations for the uploader. The `config` file is a JSON/YAML file that contains the following keys:
Alternatively, you can also supply a `config` file that contains all necessary information for the uploader. The `config` file is a JSON/YAML file that contains the following keys:

* `persistent_id`: Persistent identifier of the dataset to upload to.
* `dataverse_url`: URL of the Dataverse instance.
Expand Down Expand Up @@ -114,3 +114,48 @@ The `config` file can then be used as follows:
```bash
dvuploader --config-path config.yml
```

## Development

To install the development dependencies, run the following command:

```bash
pip install poetry
poetry install --with test
```

### Running tests locally

In order to test the DVUploader, you need to have a Dataverse instance running. You can start a local Dataverse instance by following these steps:

**1. Start the Dataverse instance**

```bash
docker compose \
-f ./docker/docker-compose-base.yml \
--env-file local-test.env \
up -d
```

**2. Set up the environment variables**

```bash
export BASE_URL=http://localhost:8080
export $(grep "API_TOKEN" "dv/bootstrap.exposed.env")
export DVUPLOADER_TESTING=true
```

**3. Run the test(s) with pytest**

```bash
python -m pytest -v
```

### Linting

This repository uses `ruff` to lint the code and `codespell` to check for spelling mistakes. You can run the linters with the following command:

```bash
python -m ruff
python -m codespell --check-filenames
```
137 changes: 137 additions & 0 deletions docker/docker-compose-base.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
version: "2.4"
name: pydataverse
services:
dataverse:
container_name: "dataverse"
hostname: dataverse
image: ${DATAVERSE_IMAGE}
restart: on-failure
user: payara
environment:
- DATAVERSE_DB_HOST=postgres
- DATAVERSE_DB_USER=${DATAVERSE_DB_USER}
- DATAVERSE_DB_PASSWORD=${DATAVERSE_DB_PASSWORD}
- JVM_ARGS=-Ddataverse.pid.providers=fake
-Ddataverse.pid.default-provider=fake
-Ddataverse.pid.fake.type=FAKE
-Ddataverse.pid.fake.label=FakeDOIProvider
-Ddataverse.pid.fake.authority=10.5072
-Ddataverse.pid.fake.shoulder=FK2/
ports:
- "8080:8080"
networks:
- dataverse
depends_on:
postgres:
condition: service_started
solr:
condition: service_started
dv_initializer:
condition: service_completed_successfully
volumes:
- ${PWD}/dv/data:/dv
- ${PWD}:/secrets
tmpfs:
- /dumps:mode=770,size=2052M,uid=1000,gid=1000
- /tmp:mode=770,size=2052M,uid=1000,gid=1000
mem_limit: 2147483648 # 2 GiB
mem_reservation: 1024m
privileged: false
healthcheck:
test: curl --fail http://dataverse:8080/api/info/version || exit 1
interval: 10s
retries: 20
start_period: 20s
timeout: 240s

dv_initializer:
container_name: "dv_initializer"
image: ${CONFIGBAKER_IMAGE}
restart: "no"
command:
- sh
- -c
- "fix-fs-perms.sh dv"
volumes:
- ${PWD}/dv/data:/dv

postgres:
container_name: "postgres"
hostname: postgres
image: postgres:${POSTGRES_VERSION}
restart: on-failure
environment:
- POSTGRES_USER=${DATAVERSE_DB_USER}
- POSTGRES_PASSWORD=${DATAVERSE_DB_PASSWORD}
ports:
- "5432:5432"
networks:
- dataverse

solr_initializer:
container_name: "solr_initializer"
image: ${CONFIGBAKER_IMAGE}
restart: "no"
command:
- sh
- -c
- "fix-fs-perms.sh solr && cp -a /template/* /solr-template"
volumes:
- ${PWD}/solr/data:/var/solr
- ${PWD}/solr/conf:/solr-template

solr:
container_name: "solr"
hostname: "solr"
image: solr:${SOLR_VERSION}
depends_on:
solr_initializer:
condition: service_completed_successfully
restart: on-failure
ports:
- "8983:8983"
networks:
- dataverse
command:
- "solr-precreate"
- "collection1"
- "/template"
volumes:
- ${PWD}/solr/data:/var/solr
- ${PWD}/solr/conf:/template

smtp:
container_name: "smtp"
hostname: "smtp"
image: maildev/maildev:2.0.5
restart: on-failure
expose:
- "25" # smtp server
environment:
- MAILDEV_SMTP_PORT=25
- MAILDEV_MAIL_DIRECTORY=/mail
networks:
- dataverse
tmpfs:
- /mail:mode=770,size=128M,uid=1000,gid=1000

bootstrap:
container_name: "bootstrap"
hostname: "bootstrap"
image: ${CONFIGBAKER_IMAGE}
restart: "no"
networks:
- dataverse
volumes:
- ${PWD}/dv/bootstrap.exposed.env:/.env
command:
- sh
- -c
- "bootstrap.sh -e /.env dev"
depends_on:
dataverse:
condition: service_healthy

networks:
dataverse:
driver: bridge
31 changes: 31 additions & 0 deletions docker/docker-compose-test-all.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
version: "2.4"
services:
unit-tests:
container_name: unit-tests
image: python:${PYTHON_VERSION}-slim
environment:
BASE_URL: http://dataverse:8080
DV_VERSION: 6.2
networks:
- dataverse
volumes:
- ${PWD}:/pydataverse
- ../dv:/dv
command:
- sh
- -c
- |
# Fetch the API Token from the local file
export $(grep "API_TOKEN" "dv/bootstrap.exposed.env")
export API_TOKEN_SUPERUSER=$$API_TOKEN
cd /pydataverse

# Run the unit tests
python3 -m pip install --upgrade pip
python3 -m pip install pytest pytest-cov
python3 -m pip install -e .
python3 -m pytest > /dv/unit-tests.log

depends_on:
bootstrap:
condition: service_completed_successfully
6 changes: 3 additions & 3 deletions dvuploader/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .dvuploader import DVUploader
from .file import File
from .utils import add_directory
from .dvuploader import DVUploader # noqa: F401
from .file import File # noqa: F401
from .utils import add_directory # noqa: F401

import nest_asyncio

Expand Down
3 changes: 0 additions & 3 deletions dvuploader/checksum.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
import hashlib
from enum import Enum
import os
from typing import IO, Callable

from pydantic import BaseModel, ConfigDict, Field


from enum import Enum
import hashlib


class ChecksumTypes(Enum):
Expand Down
Loading