diff --git a/.github/workflows/docs-deploy.yml b/.github/workflows/docs-deploy.yml index b5ffaa8f670..012a95a76c6 100644 --- a/.github/workflows/docs-deploy.yml +++ b/.github/workflows/docs-deploy.yml @@ -40,6 +40,11 @@ jobs: with: repository: lancedb/lance-ray path: lance-ray + - name: Checkout lance-huggingface + uses: actions/checkout@v4 + with: + repository: lance-format/lance-huggingface + path: lance-huggingface - name: Checkout lance-namespace-impls uses: actions/checkout@v4 with: @@ -84,6 +89,12 @@ jobs: cat >> docs/src/integrations/.pages << 'EOF' - Ray: ray EOF + - name: Copy lance-huggingface docs + run: | + cp -r lance-huggingface/docs/src docs/src/integrations/huggingface + cat >> docs/src/integrations/.pages << 'EOF' + - Huggingface: huggingface + EOF - name: Copy contributing docs run: | mkdir -p docs/src/community/project-specific/lance diff --git a/docs/src/integrations/huggingface.md b/docs/src/integrations/huggingface.md deleted file mode 100644 index 5e5a66e7363..00000000000 --- a/docs/src/integrations/huggingface.md +++ /dev/null @@ -1,15 +0,0 @@ -# HuggingFace Integration - -The HuggingFace Hub has become the go to place for ML practitioners to find pre-trained models and useful datasets. - -HuggingFace datasets can be written directly into Lance format by using the -`lance.write_dataset` method. You can write the entire dataset or a particular split. For example: - -```python -import datasets # pip install datasets -import lance - -lance.write_dataset(datasets.load_dataset( - "poloclub/diffusiondb", split="train[:10]", -), "diffusiondb_train.lance") -``` \ No newline at end of file