diff --git a/README.md b/README.md index 5ed89dc7..c6df123a 100644 --- a/README.md +++ b/README.md @@ -31,11 +31,11 @@ Additionally it provides: ## 1. Deployment -For a simple up-and-running MLX with asset catalog only, we created a [Quickstart Guide](./quickstart) using [Docker Compose](https://docs.docker.com/compose/install/). +For a simple up-and-running MLX with asset catalog only, we created a [Quickstart Guide](./bootstrapper) using [Docker Compose](https://docs.docker.com/compose/install/). For a full deployment, we use [Kubeflow Kfctl](https://github.com/kubeflow/kfctl) tooling. -* #### [MLX using Docker Compose (Asset Catalog Only)](./quickstart) +* #### [MLX using Docker Compose (Asset Catalog Only)](./bootstrapper) * #### [MLX Deployment on Kubernetes or OpenShift](./docs/mlx-setup.md) diff --git a/api/README.md b/api/README.md index 12b4f6cc..518ebaa2 100644 --- a/api/README.md +++ b/api/README.md @@ -4,7 +4,7 @@ An extension to the Kubeflow Pipeline API for Components and Models --- -# Quickstart +# Quickstart ## Deploy to Kubernetes @@ -100,7 +100,7 @@ Bring up the Quickstart without the `mlx-api` service, since we will run the MLX from our local source code, instead of using the pre-built Docker image `mlexchange/mlx-api:nightly-main`. # cd - cd quickstart + cd bootstrapper docker compose --project-name no_api up minio miniosetup mysql mlx-ui @@ -177,5 +177,5 @@ After testing or debugging your code changes, bring down the Swagger Server to populate the MLX asset catalog # cd - cd quickstart + cd bootstrapper ./init_catalog.sh diff --git a/api/examples/catalog_api.py b/api/examples/catalog_api.py index 85593982..9b9857d4 100644 --- a/api/examples/catalog_api.py +++ b/api/examples/catalog_api.py @@ -23,7 +23,7 @@ api_base_path = 'apis/v1alpha1' -catalog_upload_file = "./../../bootstrapper/catalog_upload.json" +catalog_upload_file = "../../bootstrapper/catalog_upload.json" IBM_GHE_API_TOKEN = env.get("IBM_GHE_API_TOKEN") diff --git a/bootstrapper/Dockerfile b/bootstrapper/Dockerfile deleted file mode 100644 index 4d27b5f9..00000000 --- a/bootstrapper/Dockerfile +++ /dev/null @@ -1,12 +0,0 @@ -FROM python:3.7-slim - -RUN apt-get update && apt-get install -y git - -RUN pip install requests ruamel.yaml https://storage.googleapis.com/ml-pipeline/release/0.1.21/kfp.tar.gz ai_pipeline_params - -ENV APP_HOME /app -COPY . $APP_HOME -WORKDIR $APP_HOME - -ENTRYPOINT ["python"] -CMD ["start.py"] diff --git a/bootstrapper/README.md b/bootstrapper/README.md index ddd4c034..56cc233d 100644 --- a/bootstrapper/README.md +++ b/bootstrapper/README.md @@ -1,12 +1,121 @@ -# How to use it -1. Install MLX -2. Get the [github.ibm.com personal access token](https://github.ibm.com/settings/tokens/new) and give it access to read all public repos. -3. Fillin the below environment variables in [bootstrap.yaml](bootstrap.yaml): - - **enterprise_github_token**: github.ibm.com personal access token from step 2. -4. Deploy boostrapper: - ```shell - kubectl apply -f bootstrapper/bootstrap.yaml -n kubeflow - kubectl apply -f bootstrapper/configmap.yaml -n kubeflow - ``` - - After 2-5 minutes, the assets in [configmap.yaml](configmap.yaml) should be populated. +# Bootstrapper + +## Running MLX with Docker Compose + +The "quickstart" setup uses [Docker Compose](https://docs.docker.com/compose/) +to bring up the MLX API server and the MLX Dashboard, together with the MySQL +database and Minio S3 storage backend. +In this configuration, the preloaded asset catalog of Components, Datasets, Models, +Notebooks and Pipelines can be browsed, asset metadata and sample code can be +downloaded and new assets can be registered. Sample pipeline code can be generated +for each asset type, however their execution on a Kubeflow Pipelines (KFP) cluster +is not enabled. +In a Kubernetes cluster deployment of MLX, _Pipelines_ are registered using +the KFP API and metadata storage is managed by KFP. In this Docker Compose setup +the _Pipelines_ are stored in Minio and MySQL by the MLX API server. + +## Limitations + +The _Kubeflow Pipelines_ dashboard and _Inference Service_ capabilities are not +available with this Docker Compose setup. + +## Prerequisites + +* Install [Docker Compose](https://docs.docker.com/compose/install/). +* It may be necessary to increase the [Docker resources](https://docs.docker.com/docker-for-mac/#resources) from the +default of 2 GB memory to 4 GB. +* Approximately 10 GB of free storage + +Clone this repository and navigate to the `bootstrapper` folder: + + git clone https://github.com/machine-learning-exchange/mlx.git + cd mlx/bootstrapper + +## Keep up to date + +If some time has passed since the `mlx` repository was cloned, +make sure to pull the latest sources: + + git pull + +## Pull the Docker Images + +Our Docker images for the [mlx-api](https://hub.docker.com/r/mlexchange/mlx-api/tags?name=nightly) +and [mlx-ui](https://hub.docker.com/r/mlexchange/mlx-ui/tags?name=nightly) +get rebuilt nightly. To get the latest version, run: + + docker compose pull + +## Bring up the Docker Containers + + docker compose up + +Wait for the containers to start up. When the MLX API and UI are ready, this +message should show up in the terminal log: + +```Markdown +dashboard_1 | +dashboard_1 | ================================================ +dashboard_1 | Open the MLX Dashboard at http://localhost:80/ +dashboard_1 | ================================================ +dashboard_1 | +``` + +Now open a web browser and type `localhost` in the address bar to open the MLX +dashboard. + +The MLX API spec can be explored at `localhost:8080/apis/v1alpha1/ui/` + +**Note:** If the Docker compose stack is running on a remote host, and the +MLX Web UI is running on `localhost`, export the environment +variable `DOCKER_HOST_IP`, so that the MLX UI web app on `localhost` can connect +to the MLX API on the Docker host. + + export DOCKER_HOST_IP=127.0.0.1 + docker compose up + +## Shut Down the Docker Containers + +Press `control` + `c` on the Terminal to stop and then remove the containers: + + docker compose down -v + +## Remove the Data Created by Minio and MySQL + + docker volume prune -f + +## Troubleshooting + +If you are working on a local clone of your fork, rather than a clone of the source +repository, make sure to keep your code up to date: + + git remote add upstream https://github.com/machine-learning-exchange/mlx.git + git fetch upstream + git checkout main + git rebase upstream/main + git push origin main --force + +Since we are actively developing MLX, there may have been changes to the data schema +which could conflict with the data created by running the Quickstart in days prior. +The symptoms of this could be empty dashboards with endlessly spinning wheels. +To remove all previously created Docker Compose data run the following commands: + + docker compose down -v --remove-orphans + docker compose rm -v -f + docker volume prune -f + +### Windows Subsystem for Linux (WSL) Issues + +#### Featured Assets Pages are Empty + +If there are no featured asset cards showing up in the MLX web UI and the Docker Compose log shows an error like this: + + catalog_1 | /bin/sh: /init_catalog.sh: not found + catalog_1 exited with code 127 + +Make sure you originally cloned/forked the source repo from inside the WSL sub-system, not Windows. This error happens +because the MLX source files have Windows line endings (`\r\n` - CRLF) which `bash` cannot run. +(https://askubuntu.com/questions/966488/how-do-i-fix-r-command-not-found-errors-running-bash-scripts-in-wsl#comment1553686_966488). +This error in the `catalog_1` service prevents the loading of assets and objects into the MLX catalog. + + diff --git a/bootstrapper/bootstrap.yaml b/bootstrapper/bootstrap.yaml deleted file mode 100644 index c8a4bc46..00000000 --- a/bootstrapper/bootstrap.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright 2021 The MLX Contributors -# -# SPDX-License-Identifier: Apache-2.0 -apiVersion: batch/v1 -kind: Job -metadata: - name: bootstrap - namespace: kubeflow -spec: - backoffLimit: 5 - template: - spec: - containers: - - name: mlx-bootstrap - image: aipipeline/bootstrap:latest - imagePullPolicy: Always - env: - - name: internal_github_raw_url - value: https://raw.githubusercontent.com/machine-learning-exchange/mlx/main/ - - name: enterprise_github_token - value: '' - - name: mlx_api - value: mlx-api - - name: cleanup - value: 'false' - volumeMounts: - - name: mlx-config - mountPath: /etc/config.json - subPath: config.json - restartPolicy: Never - volumes: - - name: mlx-config - configMap: - name: mlx-config diff --git a/bootstrapper/configmap.yaml b/bootstrapper/configmap.yaml deleted file mode 100644 index 2d7bee31..00000000 --- a/bootstrapper/configmap.yaml +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright 2021 The MLX Contributors -# -# SPDX-License-Identifier: Apache-2.0 -apiVersion: v1 -kind: ConfigMap -metadata: - name: mlx-config - namespace: kubeflow -data: - config.json: | - { - "pipelines":[ - ], - "components": [ - { - "name": "Train Spark Model - IBM Cloud", - "source": "https://raw.githubusercontent.com/kubeflow/pipelines/master/components/ibm-components/spark/train_spark/component.yaml" - }, - { - "name": "Serve PyTorch Model - Seldon Core", - "source": "https://raw.githubusercontent.com/kubeflow/pipelines/master/components/ibm-components/ffdl/serve/component.yaml" - }, - { - "name": "Deploy Model - Watson Machine Learning", - "source": "https://raw.githubusercontent.com/kubeflow/pipelines/master/components/ibm-components/watson/deploy/component.yaml" - }, - { - "name": "Train Model - Fabric for Deep Learning", - "source": "https://raw.githubusercontent.com/kubeflow/pipelines/master/components/ibm-components/ffdl/train/component.yaml" - }, - { - "name": "Model Robustness Check - PyTorch", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/metrics/robustness_checker/component.yaml" - }, - { - "name": "Model Fairness Check - PyTorch", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/metrics/bias_detector/component.yaml" - }, - { - "name": "Deploy Model - Kubernetes", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/kubernetes/kube_deployment/component.yaml" - }, - { - "name": "Deploy Model - Knative", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/knative/knative_container_deployment/component.yaml" - }, - { - "name": "Subscribe - Watson OpenScale", - "source": "https://raw.githubusercontent.com/kubeflow/pipelines/master/components/ibm-components/watson/manage/subscribe/component.yaml" - }, - { - "name": "Store model - Watson Machine Learning", - "source": "https://raw.githubusercontent.com/kubeflow/pipelines/master/components/ibm-components/watson/store/component.yaml" - }, - { - "name": "Jupyter", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/jupyter/component.yaml" - } - ], - "models": [ - { - "name": "MAX Audio Classifier", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-audio-classifier.yaml" - }, - { - "name": "MAX Breast Cancer Mitosis Detector", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-breast-cancer-mitosis-detector.yaml" - }, - { - "name": "MAX Facial Age Estimator", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-facial-age-estimator.yaml" - }, - { - "name": "MAX Image Caption Generator", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-image-caption-generator.yaml" - }, - { - "name": "Image Completer", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/image-completer.yaml" - }, - { - "name": "MAX Image Segmenter", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-image-segmenter.yaml" - }, - { - "name": "MAX News Text Generator", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-news-text-generator.yaml" - }, - { - "name": "MAX Object Detector", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-object-detector.yaml" - }, - { - "name": "MAX Sports Video Classifier", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-sports-video-classifier.yaml" - }, - { - "name": "MAX Weather Forecaster", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-weather-forecaster.yaml" - } - ], - "notebooks": [ - { - "name": "AIF360 Gender Classification", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/aif360-gender.yaml" - }, - { - "name": "ART detector model", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/art-detector.yaml" - }, - { - "name": "ART poisoning attack", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/art-poison.yaml" - }, - { - "name": "AIF360 Bias detection example", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/aif-bias.yaml" - }, - { - "name": "Watson OpenScale", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/watson-openscale.yaml" - }, - { - "name": "Watson OpenScale Walkthrough", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/OpenScale.yaml" - }, - { - "name": "Train and deploy with Watson Machine Learning", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/watson-ml.yaml" - } - ], - "datasets": [ - { - "name": "Thematic Clustering", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/thematic_clustering.yaml" - }, - { - "name": "Finance Proposition Bank", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/fpb.yaml" - }, - { - "name": "Groningen Meaning Bank", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/gmb.yaml" - }, - { - "name": "NOAA Weather Data", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/jfk.yaml" - }, - { - "name": "PubLayNet", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/publaynet.yaml" - }, - { - "name": "PubTabNet", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/pubtabnet.yaml" - }, - { - "name": "TensorFlow Speech Commands", - "source": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/tsc.yaml" - } - ] - } diff --git a/quickstart/docker-compose.yaml b/bootstrapper/docker-compose.yaml similarity index 100% rename from quickstart/docker-compose.yaml rename to bootstrapper/docker-compose.yaml diff --git a/bootstrapper/init_catalog.sh b/bootstrapper/init_catalog.sh new file mode 100644 index 00000000..1bb2ef19 --- /dev/null +++ b/bootstrapper/init_catalog.sh @@ -0,0 +1,81 @@ +#!/bin/sh + +# Copyright 2021 The MLX Contributors +# +# SPDX-License-Identifier: Apache-2.0 + +# If no options are given, use environment variable +MLX_API_SERVER=${MLX_API_SERVER:-'localhost:80'} + +# Print full usage details +print_usage () { + # Max output line length is 80, arbitrarily. + echo 'Usage: init_catalog.sh [OPTION...]' + echo ' --host=HOST Use the specified server host. Must be given alongside --port.' + echo ' --port=PORT Use the specified server port. Must be given alongside --host.' + echo ' --disable-run Disable Inference Services & Kubeflow Pipeline execution.' + echo ' Necessary when using Docker Compose instead of Kubernetes.' + echo ' --help Display this help text & exit.' + echo + echo 'If either the host or the port is not specified, the entire address defaults to' + echo "MLX_API_SERVER ($MLX_API_SERVER)" + exit 0 +} + +# Handle user input for server & port +HOST='' +PORT='' +DISABLE_RUN='' +while [ -n "$1" ]; do + case $1 in + --host=*) HOST="${1#???????}" ;; # "${var:start}" syntax is not a POSIX feature + --port=*) PORT="${1#???????}" ;; + --disable-run) DISABLE_RUN=1 ;; + --help) print_usage ;; + *) + echo "init_catalog.sh: Option \"$1\" not recognized" >&2 + echo "For more info, try \"init_catalog.sh --help\"" >&2 + exit 1 ;; + esac + shift +done + +# Build URL from arguments if all are specified +if [ -n "$HOST" ] && [ -n "$PORT" ]; then + MLX_API_ADDRESS=$HOST:$PORT + echo "API address: $MLX_API_ADDRESS" +else + MLX_API_ADDRESS=$MLX_API_SERVER + echo "API server or port not specified; using MLX_API_SERVER ($MLX_API_SERVER)" +fi + +MLX_API_URL="http://$MLX_API_ADDRESS/apis/v1alpha1" + + +# Main functionality + +# Wait for the MLX API server, but more importantly the MySQL server, to be ready +until curl -X GET -H 'Accept: application/json' -s "$MLX_API_URL/health_check?check_database=true&check_object_store=true" | grep -q 'Healthy'; do + echo 'Waiting for MLX-API, Minio, MySQL ...' + sleep 1 +done + +# Echo the MLX API server status (should be "Healthy") +curl -X GET -H 'Accept: application/json' -s "$MLX_API_URL/health_check?check_database=true&check_object_store=true" + +# Upload the pipeline asset catalog +curl -X POST -H 'Content-Type: application/json' -H 'Accept: application/json' -d @catalog_upload.json -s "$MLX_API_URL/catalog" | grep -iE "total_|error" + +# [Obsolete] Mark all the catalog assets as approved and featured +# for asset_type in components datasets models notebooks pipelines; do +# curl -X POST -H 'Content-Type: application/json' -H 'Accept: application/json' -d '["*"]' -s "$MLX_API_URL/$asset_type/publish_approved" +# curl -X POST -H 'Content-Type: application/json' -H 'Accept: application/json' -d '["*"]' -s "$MLX_API_URL/$asset_type/featured" +# done + +if [ -n "$DISABLE_RUN" ]; then + # Disable the Inference Services, since we don't have a KFP cluster + curl -X PUT -H 'Content-Type: application/json' -H 'Accept: application/json' -d '{"Inference Services": false}' -s "$MLX_API_URL/settings" -o /dev/null --show-error + + # Disable pipeline execution, since we don't have a KFP cluster + curl -X PUT -H 'Content-Type: application/json' -H 'Accept: application/json' -d '{"Execution enabled": false}' -s "$MLX_API_URL/settings" -o /dev/null --show-error +fi diff --git a/quickstart/init_db.sql b/bootstrapper/init_db.sql similarity index 100% rename from quickstart/init_db.sql rename to bootstrapper/init_db.sql diff --git a/bootstrapper/start.py b/bootstrapper/start.py deleted file mode 100644 index 919b14ef..00000000 --- a/bootstrapper/start.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright 2021 The MLX Contributors -# -# SPDX-License-Identifier: Apache-2.0 -import json -import os -import requests -import subprocess -import tarfile -import shutil - - -internal_github_raw_url = os.getenv("internal_github_raw_url", "https://raw.githubusercontent.com/machine-learning-exchange/mlx/main/") -api_url = os.getenv("mlx_api", "mlx-api") -token = os.getenv("enterprise_github_token", "") -repo_name = 'mlx' -asset_categories = ['pipelines', 'components', 'models', 'notebooks', 'datasets'] - - -def get_github_files(asset_name, asset_list): - os.makedirs(asset_name, exist_ok=True) - for asset in asset_list: - if token: - headers = { - 'Accept': 'application/vnd.github.v3.raw', - 'Authorization': 'token ' + token - } - else: - headers = {} - if '://' not in asset['source'] and token: - r = requests.get(internal_github_raw_url + asset['source'], headers=headers) - elif 'raw.github.ibm.com' in asset['source'] and token: - r = requests.get(asset['source'], headers=headers) - elif '://' in asset['source']: - r = requests.get(asset['source']) - else: - continue - - if asset_name != 'components': - filename = os.path.basename(asset['source']) - else: - filename = asset['name'].replace(" ", "-") + '.yaml' - with open(os.path.join(asset_name, filename), "w") as file: - file.write(r.text) - asset['download'] = 'true' - - -def upload_asset(asset_name, asset_list): - for asset in asset_list: - if asset.get('download', '') == 'true': - if asset_name != 'components': - filename = os.path.basename(asset['source']) - else: - filename = asset['name'].replace(" ", "-") + '.yaml' - tarname = filename.replace(".yaml", ".tgz") - tarfile_path = os.path.join(asset_name, tarname) - with tarfile.open(tarfile_path, "w:gz") as tar: - tar.add(os.path.join(asset_name, filename), arcname=filename) - tar.close() - params = { - 'name': asset.get('name', '') - } - if asset_name == 'notebooks' and '://' not in asset['source'] and token: - data = { - 'enterprise_github_token': token - } - else: - data = {} - with open(os.path.join(asset_name, tarname), 'rb') as f: - r = requests.post("http://" + api_url + '/apis/v1alpha1/' + asset_name + '/upload', params=params, files={'uploadfile': f}, data=data) - print(r.text) - - -def cleanup_assets(asset_name): - r = requests.delete("http://" + api_url + '/apis/v1alpha1/' + asset_name + '/*') - print(r.text) - - -def get_github_dir_files(asset_name, asset_list): - os.makedirs(asset_name, exist_ok=True) - if token: - headers = { - 'Accept': 'application/vnd.github.v3.raw', - 'Authorization': 'token ' + token - } - internal_github_url = internal_github_raw_url.replace('raw.', token + '@').replace('/master/', '') - command = ['git', 'clone', internal_github_url, repo_name] - subprocess.run(command, check=True) - for asset in asset_list: - if '://' not in asset['source'] and token: - shutil.copytree(repo_name + '/' + asset['source'], asset_name + '/' + asset['name'].replace(" ", "-")) - asset['url'] = internal_github_url + '/' + asset['source'] - asset['download'] = 'true' - elif '://' in asset['source']: - source_pieces = asset['source'].split('/') - github_url = '/'.join(source_pieces[0:5]) - github_repo = source_pieces[4] - source_dir = '/'.join(source_pieces[7:]) - command = ['git', 'clone', github_url, github_repo] - if github_repo not in os.listdir('.'): - subprocess.run(command, check=True) - shutil.copytree(github_repo + '/' + source_dir, asset_name + '/' + asset['name'].replace(" ", "-")) - asset['url'] = asset['source'] - asset['download'] = 'true' - - -def upload_dir_asset(asset_name, asset_list): - for asset in asset_list: - if asset.get('download', '') == 'true': - dirname = asset['name'].replace(" ", "-") - tarname = dirname + '.tgz' - tarfile_path = os.path.join(asset_name, tarname) - with tarfile.open(tarfile_path, "w:gz") as tar: - for filename in os.listdir(os.path.join(asset_name, dirname)): - if filename.endswith('.yaml') or filename.endswith('.yml'): - tar.add(os.path.join(asset_name, dirname, filename), arcname=filename) - tar.close() - with open(os.path.join(asset_name, tarname), 'rb') as f: - params = { - 'name': asset.get('name', ''), - 'url': asset.get('url', '') - } - r = requests.post("http://" + api_url + '/apis/v1alpha1/' + asset_name + '/upload', files={'uploadfile': f}, params=params) - print(r.text) - - -def feature_default_assets(): - for category in asset_categories: - data = ['*'] - r = requests.post("http://" + api_url + '/apis/v1alpha1/' + category + '/publish_approved', json=data) - print(r.text) - r = requests.post("http://" + api_url + '/apis/v1alpha1/' + category + '/featured', json=data) - print(r.text) - - -if __name__ == '__main__': - with open("/etc/config.json", "r") as f: - samples = json.load(f) - f.close() - if os.getenv('cleanup', '') == 'true': - for category in asset_categories: - cleanup_assets(category) - - get_github_files('pipelines', samples['pipelines']) - get_github_files('components', samples['components']) - get_github_files('models', samples['models']) - get_github_files('notebooks', samples['notebooks']) - get_github_files('datasets', samples['datasets']) - - if api_url: - for asset in samples['pipelines']: - if asset.get('download', '') == 'true': - filename = os.path.basename(asset['source']) - tarname = filename + '.tar.gz' - command = ['dsl-compile', '--py', os.path.join('pipelines', filename), '--output', os.path.join('pipelines', tarname)] - subprocess.run(command, check=True) - with open(os.path.join('pipelines', tarname), 'rb') as f: - params = { - 'name': asset.get('name', ''), - 'description': asset.get('description', '') - } - data = { - 'annotations': json.dumps(asset.get('annotations', {})) - } - r = requests.post("http://" + api_url + '/apis/v1alpha1/pipelines/upload', files={'uploadfile': f}, params=params, data=data) - print(r.text) - - upload_asset('components', samples['components']) - upload_asset('models', samples['models']) - upload_asset('notebooks', samples['notebooks']) - upload_asset('datasets', samples['datasets']) - feature_default_assets() diff --git a/dashboard/origin-mlx/README.md b/dashboard/origin-mlx/README.md index c90f1900..e0161a17 100644 --- a/dashboard/origin-mlx/README.md +++ b/dashboard/origin-mlx/README.md @@ -95,7 +95,7 @@ from our local source code, instead of using the pre-built Docker image `mlexcha ```Bash # cd -cd quickstart +cd bootstrapper docker compose --project-name no_ui up minio miniosetup mysql mlx-api catalog ``` diff --git a/manifests/read-only-k8s/init-jobs.yaml b/manifests/read-only-k8s/init-jobs.yaml index 52fb0b29..1caddf8a 100644 --- a/manifests/read-only-k8s/init-jobs.yaml +++ b/manifests/read-only-k8s/init-jobs.yaml @@ -54,7 +54,7 @@ spec: - -c - | cd /tmp && - curl -L --output init_catalog.sh https://raw.githubusercontent.com/machine-learning-exchange/mlx/main/quickstart/init_catalog.sh && + curl -L --output init_catalog.sh https://raw.githubusercontent.com/machine-learning-exchange/mlx/main/bootstrapper/init_catalog.sh && curl -L --output catalog_upload.json https://raw.githubusercontent.com/machine-learning-exchange/mlx/main/bootstrapper/catalog_upload.json && chmod 777 init_catalog.sh && ./init_catalog.sh diff --git a/quickstart/README.md b/quickstart/README.md deleted file mode 100644 index a23aa334..00000000 --- a/quickstart/README.md +++ /dev/null @@ -1,119 +0,0 @@ -# Running MLX with Docker Compose - -This _"quickstart"_ setup uses [Docker Compose](https://docs.docker.com/compose/) -to bring up the MLX API server and the MLX Dashboard, together with the MySQL -database and Minio S3 storage backend. -In this configuration, the preloaded asset catalog of Components, Datasets, Models, -Notebooks and Pipelines can be browsed, asset metadata and sample code can be -downloaded and new assets can be registered. Sample pipeline code can be generated -for each asset type, however their execution on a Kubeflow Pipelines (KFP) cluster -is not enabled. -In a Kubernetes cluster deployment of MLX, _Pipelines_ are registered using -the KFP API and metadata storage is managed by KFP. In this Docker Compose setup -the _Pipelines_ are stored in Minio and MySQL by the MLX API server. - -## Limitations - -The _Kubeflow Pipelines_ dashboard and _Inference Service_ capabilities are not -available with this Docker Compose setup. - -## Prerequisites - -* Install [Docker Compose](https://docs.docker.com/compose/install/). -* It may be necessary to increase the [Docker resources](https://docs.docker.com/docker-for-mac/#resources) from the -default of 2 GB memory to 4 GB. -* Approximately 10 GB of free storage - -Clone this repository and navigate to the `quickstart` folder: - - git clone https://github.com/machine-learning-exchange/mlx.git - cd mlx/quickstart - -## Keep up to date - -If some time has passed since the `mlx` repository was cloned, -make sure to pull the latest sources for the _Quickstart_: - - git pull - -## Pull the Docker Images - -Our Docker images for the [mlx-api](https://hub.docker.com/r/mlexchange/mlx-api/tags?name=nightly) -and [mlx-ui](https://hub.docker.com/r/mlexchange/mlx-ui/tags?name=nightly) -get rebuilt nightly. To get the latest version, run: - - docker compose pull - -## Bring up the Docker Containers - - docker compose up - -Wait for the containers to start up. When the MLX API and UI are ready, this -message should show up in the terminal log: - -```Markdown -dashboard_1 | -dashboard_1 | ================================================ -dashboard_1 | Open the MLX Dashboard at http://localhost:80/ -dashboard_1 | ================================================ -dashboard_1 | -``` - -Now open a web browser and type `localhost` in the address bar to open the MLX -dashboard. - -The MLX API spec can be explored at `localhost:8080/apis/v1alpha1/ui/` - -**Note:** If the Docker compose stack is running on a remote host, and the -MLX Web UI is running on `localhost`, export the environment -variable `DOCKER_HOST_IP`, so that the MLX UI web app on `localhost` can connect -to the MLX API on the Docker host. - - export DOCKER_HOST_IP=127.0.0.1 - docker compose up - -## Shut Down the Docker Containers - -Press `control` + `c` on the Terminal to stop and then remove the containers: - - docker compose down -v - -## Remove the Data Created by Minio and MySQL - - docker volume prune -f - -## Troubleshooting - -If you are working on a local clone of your fork, rather than a clone of the source -repository, make sure to keep your code up to date: - - git remote add upstream https://github.com/machine-learning-exchange/mlx.git - git fetch upstream - git checkout main - git rebase upstream/main - git push origin main --force - -Since we are actively developing MLX, there may have been changes to the data schema -which could conflict with the data created by running the Quickstart in days prior. -The symptoms of this could be empty dashboards with endlessly spinning wheels. -To remove all previously created Docker Compose data run the following commands: - - docker compose down -v --remove-orphans - docker compose rm -v -f - docker volume prune -f - -### Windows Subsystem for Linux (WSL) Issues - -#### Featured Assets Pages are Empty - -If there are no featured asset cards showing up in the MLX web UI and the Docker Compose log shows an error like this: - - catalog_1 | /bin/sh: /init_catalog.sh: not found - catalog_1 exited with code 127 - -Make sure you originally cloned/forked the source repo from inside the WSL sub-system, not Windows. This error happens -because the MLX source files have Windows line endings (`\r\n` - CRLF) which `bash` cannot run. -(https://askubuntu.com/questions/966488/how-do-i-fix-r-command-not-found-errors-running-bash-scripts-in-wsl#comment1553686_966488). -This error in the `catalog_1` service prevents the loading of assets and objects into the MLX catalog. - - diff --git a/quickstart/catalog_upload.json b/quickstart/catalog_upload.json deleted file mode 100644 index 7a5dc91f..00000000 --- a/quickstart/catalog_upload.json +++ /dev/null @@ -1,196 +0,0 @@ -{ - "components": [ - { - "name": "Create Secret - Kubernetes Cluster", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/create-secret/component.yaml" - }, - { - "name": "Generate Dataset Metadata", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/dax-to-dlf/component.yaml" - }, - { - "name": "Create Dataset Volume", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/dlf/component.yaml" - }, - { - "name": "Echo Sample", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/echo/component.yaml" - }, - { - "name": "Kubernetes Model Deploy", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/kube-model-deployment/component.yaml" - }, - { - "name": "Create Model Config", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/component-samples/model-config/component.yaml" - }, - { - "name": "Model Fairness Check", - "url": "https://raw.githubusercontent.com/Trusted-AI/AIF360/master/mlops/kubeflow/bias_detector_pytorch/component.yaml" - }, - { - "name": "Adversarial Robustness Evaluation", - "url": "https://raw.githubusercontent.com/Trusted-AI/adversarial-robustness-toolbox/main/utils/mlops/kubeflow/robustness_evaluation_fgsm_pytorch/component.yaml" - } - ], - "datasets": [ - { - "name": "Project CodeNet", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/codenet/codenet.yaml" - }, - { - "name": "Project CodeNet - Language Classifier", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/codenet_langclass/codenet_langclass.yaml" - }, - { - "name": "Project CodeNet - MLM", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/codenet_mlm/codenet_mlm.yaml" - }, - { - "name": "Finance Proposition Bank", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/fpb/fpb.yaml" - }, - { - "name": "Groningen Meaning Bank", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/gmb/gmb.yaml" - }, - { - "name": "NOAA Weather Data", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/jfk/jfk.yaml" - }, - { - "name": "PubLayNet", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/publaynet/publaynet.yaml" - }, - { - "name": "PubTabNet", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/pubtabnet/pubtabnet.yaml" - }, - { - "name": "Thematic Clustering", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/thematic_clustering/thematic_clustering.yaml" - }, - { - "name": "TensorFlow Speech Commands", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/dataset-samples/tsc/tsc.yaml" - } - ], - "models": [ - { - "name": "CodeNet Language Classification", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/codenet-language-classification/codenet-language-classification.yaml" - }, - { - "name": "Human Pose Estimator", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-human-pose-estimator/max-human-pose-estimator.yaml" - }, - { - "name": "Image Caption Generator", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-image-caption-generator/max-image-caption-generator.yaml" - }, - { - "name": "Image Resolution Enhancer", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-image-resolution-enhancer/max-image-resolution-enhancer.yaml" - }, - { - "name": "Named Entity Tagger", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-named-entity-tagger/max-named-entity-tagger.yaml" - }, - { - "name": "Object Detector", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-object-detector/max-object-detector.yaml" - }, - { - "name": "Optical Character Recognition", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-ocr/max-ocr.yaml" - }, - { - "name": "Question Answering", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-question-answering/max-question-answering.yaml" - }, - { - "name": "Recommender System", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-recommender/max-recommender.yaml" - }, - { - "name": "Text Sentiment Classifier", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-text-sentiment-classifier/max-text-sentiment-classifier.yaml" - }, - { - "name": "Toxic Comment Classifier", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-toxic-comment-classifier/max-toxic-comment-classifier.yaml" - }, - { - "name": "Weather Forecaster", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/model-samples/max-weather-forecaster/max-weather-forecaster.yaml" - } - ], - "notebooks": [ - { - "name": "JFK Airport Analysis", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/JFK-airport.yaml" - }, - { - "name": "AIF360 Bias detection example", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/aif-bias.yaml" - }, - { - "name": "ART detector model", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/art-detector.yaml" - }, - { - "name": "ART poisoning attack", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/art-poison.yaml" - }, - { - "name": "Project CodeNet - Language Classification", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/codenet-lang.yaml" - }, - { - "name": "Project CodeNet - MLM", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/codenet-mlm.yaml" - }, - { - "name": "Qiskit Quantum Kernel Machine Learning", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/qiskit-ml.yaml" - }, - { - "name": "Qiskit Neural Network Classifier and Regressor", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/notebook-samples/qiskit-nncr.yaml" - } - ], - "pipelines": [ - { - "name": "Parallel Join", - "url": "https://github.com/kubeflow/kfp-tekton/blob/master/sdk/python/tests/compiler/testdata/parallel_join.yaml" - }, - { - "name": "Sequential Pipeline", - "url": "https://github.com/kubeflow/kfp-tekton/blob/master/sdk/python/tests/compiler/testdata/sequential.yaml" - }, - { - "name": "ResourceOp Basic", - "url": "https://github.com/kubeflow/kfp-tekton/blob/master/sdk/python/tests/compiler/testdata/resourceop_basic.yaml" - }, - { - "name": "Calculation Pipeline", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/calculation-pipeline.yaml" - }, - { - "name": "Katib Early Stopping Experiment", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/katib-pipeline.yaml" - }, - { - "name": "Nested Pipeline", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/nested-pipeline.yaml" - }, - { - "name": "Trusted AI Pipeline", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/trusted-ai-pipeline.yaml" - }, - { - "name": "Watson Machine Learning", - "url": "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/pipeline-samples/wml-pipeline.yaml" - } - ] -} diff --git a/quickstart/init_catalog.sh b/quickstart/init_catalog.sh deleted file mode 100755 index f8b479f7..00000000 --- a/quickstart/init_catalog.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/sh - -# Copyright 2021 The MLX Contributors -# -# SPDX-License-Identifier: Apache-2.0 - -MLX_API_SERVER=${MLX_API_SERVER:-"localhost:8080"} -MLX_API_URL="http://${MLX_API_SERVER}/apis/v1alpha1" - -# wait for the MLX API server, but more importantly the MySQL server to be ready -until curl -X GET -H 'Accept: application/json' -s "${MLX_API_URL}/health_check?check_database=true&check_object_store=true" | grep -q 'Healthy'; do - echo 'Waiting for MLX-API, Minio, MySQL ...' - sleep 1 -done - -# echo the MLX API server status, should be "Healthy" -curl -X GET -H 'Accept: application/json' -s "${MLX_API_URL}/health_check?check_database=true&check_object_store=true" - -# upload the pipeline asset catalog -curl -X POST -H 'Content-Type: application/json' -H 'Accept: application/json' -d @catalog_upload.json -s "${MLX_API_URL}/catalog" | grep -iE "total_|error" - -# mark all the catalog assets as approved and featured -for asset_type in components datasets models notebooks pipelines; do - curl -X POST -H 'Content-Type: application/json' -H 'Accept: application/json' -d '["*"]' -s "${MLX_API_URL}/$asset_type/publish_approved" - curl -X POST -H 'Content-Type: application/json' -H 'Accept: application/json' -d '["*"]' -s "${MLX_API_URL}/$asset_type/featured" -done - -# disable the Inference Services, since we don't have a KFP cluster -curl -X PUT -H 'Content-Type: application/json' -H 'Accept: application/json' -d '{"Inference Services": false}' -s "${MLX_API_URL}/settings" -o /dev/null --show-error - -# disable pipeline execution, since we don't have a KFP cluster -curl -X PUT -H 'Content-Type: application/json' -H 'Accept: application/json' -d '{"Execution enabled": false}' -s "${MLX_API_URL}/settings" -o /dev/null --show-error diff --git a/tools/python/regenerate_catalog_upload_json.py b/tools/python/regenerate_catalog_upload_json.py index 43483461..cca79060 100755 --- a/tools/python/regenerate_catalog_upload_json.py +++ b/tools/python/regenerate_catalog_upload_json.py @@ -27,10 +27,7 @@ katalog_dir = f"{project_dir}/../katalog" # TODO: don't assume user cloned katalog and mlx repos into same parent folder katalog_url = "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/" -catalog_upload_json_files = [ - f"{project_dir}/bootstrapper/catalog_upload.json", - f"{project_dir}/quickstart/catalog_upload.json", -] +catalog_upload_json = "bootstrapper/catalog_upload.json" def get_list_of_yaml_files_in_katalog(asset_type: str): @@ -45,7 +42,7 @@ def get_list_of_yaml_files_in_katalog(asset_type: str): def generate_katalog_dict() -> dict: - katalog_dict = dict() + katalog_dict = {} for asset_type in asset_types: @@ -73,31 +70,23 @@ def generate_katalog_dict() -> dict: return katalog_dict -def rewrite_catalog_upload_json_files(katalog: dict): - - for file_path in catalog_upload_json_files: - - with open(file_path, "w") as output_file: +def rewrite_catalog_upload_json(katalog: dict): - print(" - " + relpath(file_path, project_dir)) + with open(f"{project_dir}/{catalog_upload_json}", "w") as output_file: - output_file.write(json.dumps(katalog, sort_keys=False, indent=2)) - output_file.write("\n") + print(f" - {catalog_upload_json}") + output_file.write(json.dumps(katalog, sort_keys=False, indent=2)) + output_file.write("\n") -def main(): - print("Regenerating catalog_upload.json files:") +if __name__ == "__main__": - # TODO: read current catalog_upload.json file(s) to capture non-katalog assets and restore later + print("Regenerating catalog_upload.json:") - katalog_dict = generate_katalog_dict() + # TODO: read current catalog_upload.json file to capture non-katalog assets and restore later - rewrite_catalog_upload_json_files(katalog_dict) + # Generate new catalog_upload.json + rewrite_catalog_upload_json(generate_katalog_dict()) print("Done. Use git diff to evaluate if and which changes are desired!") - - -if __name__ == '__main__': - - main()