Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion backend/app/routers/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,12 @@
from app.routers.files import add_file_entry, add_local_file_entry, remove_file_entry
from app.routers.licenses import delete_license
from app.search.connect import delete_document_by_id
from app.search.index import index_dataset, index_file
from app.search.index import (
index_dataset,
index_file,
index_folder,
remove_folder_index,
)
from beanie import PydanticObjectId
from beanie.odm.operators.update.general import Inc
from beanie.operators import And, Or
Expand Down Expand Up @@ -341,6 +346,13 @@ async def edit_dataset(

# Update entry to the dataset index
await index_dataset(es, DatasetOut(**dataset.dict()), update=True)

# Update folders index since its using dataset downloads and status to index
async for folder in FolderDB.find(
FolderDB.dataset_id == PydanticObjectId(dataset_id)
):
await index_folder(es, FolderOut(**folder.dict()), update=True)

return dataset.dict()
raise HTTPException(status_code=404, detail=f"Dataset {dataset_id} not found")

Expand Down Expand Up @@ -379,6 +391,13 @@ async def patch_dataset(

# Update entry to the dataset index
await index_dataset(es, DatasetOut(**dataset.dict()), update=True)

# Update folders index since its using dataset downloads and status to index
async for folder in FolderDB.find(
FolderDB.dataset_id == PydanticObjectId(dataset_id)
):
await index_folder(es, FolderOut(**folder.dict()), update=True)

return dataset.dict()


Expand Down Expand Up @@ -423,6 +442,7 @@ async def add_folder(
dataset_id: str,
folder_in: FolderIn,
user=Depends(get_current_user),
es: Elasticsearch = Depends(dependencies.get_elasticsearchclient),
allow: bool = Depends(Authorization("uploader")),
):
if (await DatasetDB.get(PydanticObjectId(dataset_id))) is not None:
Expand All @@ -436,6 +456,7 @@ async def add_folder(
**folder_in.dict(), creator=user, dataset_id=PydanticObjectId(dataset_id)
)
await new_folder.insert()
await index_folder(es, FolderOut(**new_folder.dict()))
return new_folder.dict()
raise HTTPException(status_code=404, detail=f"Dataset {dataset_id} not found")

Expand Down Expand Up @@ -595,9 +616,11 @@ async def _delete_nested_folders(parent_folder_id):
await remove_file_entry(file.id, fs, es)
await _delete_nested_folders(subfolder.id)
await subfolder.delete()
await remove_folder_index(subfolder.id, es)

await _delete_nested_folders(folder_id)
await folder.delete()
await remove_folder_index(folder.id, es)
return {"deleted": folder_id}
else:
raise HTTPException(status_code=404, detail=f"Folder {folder_id} not found")
Expand All @@ -623,6 +646,7 @@ async def patch_folder(
dataset_id: str,
folder_id: str,
folder_info: FolderPatch,
es: Elasticsearch = Depends(dependencies.get_elasticsearchclient),
user=Depends(get_current_user),
allow: bool = Depends(Authorization("editor")),
):
Expand All @@ -640,6 +664,8 @@ async def patch_folder(
folder.parent_folder = folder_info.parent_folder
folder.modified = datetime.datetime.utcnow()
await folder.save()
await index_folder(es, FolderOut(**folder.dict()), update=True)

return folder.dict()
else:
raise HTTPException(status_code=404, detail=f"Folder {folder_id} not found")
Expand Down Expand Up @@ -894,6 +920,7 @@ async def create_dataset_from_zip(
@router.get("/{dataset_id}/download", response_model=DatasetOut)
async def download_dataset(
dataset_id: str,
es: Elasticsearch = Depends(dependencies.get_elasticsearchclient),
user=Depends(get_current_user),
fs: Minio = Depends(dependencies.get_fs),
allow: bool = Depends(Authorization("viewer")),
Expand Down Expand Up @@ -1043,6 +1070,15 @@ async def download_dataset(
response.headers["Content-Disposition"] = "attachment; filename=%s" % zip_name
# Increment download count
await dataset.update(Inc({DatasetDB.downloads: 1}))

# reindex
await index_dataset(es, DatasetOut(**dataset.dict()), update=True)
# Update folders index since its using dataset downloads and status to index
async for folder in FolderDB.find(
FolderDB.dataset_id == PydanticObjectId(dataset_id)
):
await index_folder(es, FolderOut(**folder.dict()), update=True)

return response
raise HTTPException(status_code=404, detail=f"Dataset {dataset_id} not found")

Expand Down
9 changes: 9 additions & 0 deletions backend/app/routers/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ async def download_file(
file_id: str,
version: Optional[int] = None,
increment: Optional[bool] = True,
es: Elasticsearch = Depends(dependencies.get_elasticsearchclient),
fs: Minio = Depends(dependencies.get_fs),
allow: bool = Depends(FileAuthorization("viewer")),
):
Expand Down Expand Up @@ -340,6 +341,10 @@ async def download_file(
if increment:
# Increment download count
await file.update(Inc({FileDB.downloads: 1}))

# reindex
await index_file(es, FileOut(**file.dict()), update=True)

return response

else:
Expand All @@ -351,6 +356,7 @@ async def download_file_url(
file_id: str,
version: Optional[int] = None,
expires_in_seconds: Optional[int] = 3600,
es: Elasticsearch = Depends(dependencies.get_elasticsearchclient),
external_fs: Minio = Depends(dependencies.get_external_fs),
allow: bool = Depends(FileAuthorization("viewer")),
):
Expand Down Expand Up @@ -392,6 +398,9 @@ async def download_file_url(
# Increment download count
await file.update(Inc({FileDB.downloads: 1}))

# reindex
await index_file(es, FileOut(**file.dict()), update=True)

# return presigned url
return {"presigned_url": presigned_url}
else:
Expand Down
12 changes: 12 additions & 0 deletions backend/app/routers/public_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
from app.models.folders import FolderDB, FolderDBViewList, FolderOut
from app.models.metadata import MetadataDB, MetadataDefinitionDB, MetadataOut
from app.models.pages import Paged, _construct_page_metadata, _get_page_query
from app.search.index import index_dataset, index_folder
from beanie import PydanticObjectId
from beanie.odm.operators.update.general import Inc
from beanie.operators import And, Or
from bson import ObjectId, json_util
from elasticsearch import Elasticsearch
from fastapi import APIRouter, Depends, Form, HTTPException
from fastapi.responses import StreamingResponse
from fastapi.security import HTTPBearer
Expand Down Expand Up @@ -217,6 +219,7 @@ async def get_dataset_metadata(
@router.get("/{dataset_id}/download", response_model=DatasetOut)
async def download_dataset(
dataset_id: str,
es: Elasticsearch = Depends(dependencies.get_elasticsearchclient),
fs: Minio = Depends(dependencies.get_fs),
):
if (dataset := await DatasetDB.get(PydanticObjectId(dataset_id))) is not None:
Expand Down Expand Up @@ -370,6 +373,15 @@ async def download_dataset(
)
# Increment download count
await dataset.update(Inc({DatasetDB.downloads: 1}))

# reindex
await index_dataset(es, DatasetOut(**dataset.dict()), update=True)
# Update folders index since its using dataset downloads and status to index
async for folder in FolderDB.find(
FolderDB.dataset_id == PydanticObjectId(dataset_id)
):
await index_folder(es, FolderOut(**folder.dict()), update=True)

return response
else:
raise HTTPException(
Expand Down
52 changes: 49 additions & 3 deletions backend/app/search/index.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
from typing import List, Optional
from typing import List, Optional, Union

from app.config import settings
from app.models.authorization import AuthorizationDB
from app.models.datasets import DatasetOut
from app.models.datasets import DatasetDB, DatasetOut
from app.models.files import FileDB, FileOut
from app.models.folders import FolderOut
from app.models.metadata import MetadataDB
from app.models.search import ElasticsearchEntry
from app.models.thumbnails import ThumbnailDB
from app.search.connect import insert_record, update_record
from app.search.connect import delete_document_by_id, insert_record, update_record
from beanie import PydanticObjectId
from bson import ObjectId
from elasticsearch import Elasticsearch, NotFoundError
from fastapi import HTTPException


async def index_dataset(
Expand Down Expand Up @@ -112,6 +114,50 @@ async def index_file(
insert_record(es, settings.elasticsearch_index, doc, file.id)


async def index_folder(
es: Elasticsearch,
folder: FolderOut,
user_ids: Optional[List[str]] = None,
update: bool = False,
):
"""Create or update an Elasticsearch entry for the folder."""
# find dataset this folder belongs to
if (
dataset := await DatasetDB.find_one(
DatasetDB.id == PydanticObjectId(folder.dataset_id)
)
) is not None:
downloads = dataset.downloads
status = dataset.status
else:
raise HTTPException(
status_code=404, detail="Orphan folder doesn't belong to any dataset."
)

doc = ElasticsearchEntry(
resource_type="folder",
name=folder.name,
creator=folder.creator.email,
created=folder.created,
dataset_id=str(folder.dataset_id),
folder_id=str(folder.id),
downloads=downloads,
status=status,
).dict()

if update:
try:
update_record(es, settings.elasticsearch_index, {"doc": doc}, folder.id)
except NotFoundError:
insert_record(es, settings.elasticsearch_index, doc, folder.id)
else:
insert_record(es, settings.elasticsearch_index, doc, folder.id)


async def remove_folder_index(folderId: Union[str, ObjectId], es: Elasticsearch):
delete_document_by_id(es, settings.elasticsearch_index, str(folderId))


async def index_thumbnail(
es: Elasticsearch,
thumbnail_id: str,
Expand Down
6 changes: 4 additions & 2 deletions frontend/src/components/search/PublicSearch.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -164,18 +164,19 @@ export function PublicSearch() {
{luceneOn ? (
<ReactiveList
componentId="results"
dataField="_score"
dataField="created"
size={20}
pagination={true}
react={{
and: ["string-searchbox"],
}}
render={({ data }) => <PublicSearchResult data={data} />}
sortBy="desc"
/>
) : (
<ReactiveList
componentId="results"
dataField="_score"
dataField="created"
size={20}
pagination={true}
react={{
Expand All @@ -189,6 +190,7 @@ export function PublicSearch() {
render={({ data }) => {
return <PublicSearchResult data={data} />;
}}
sortBy="desc"
/>
)}
</ReactiveBase>
Expand Down
29 changes: 28 additions & 1 deletion frontend/src/components/search/PublicSearchResult.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import { parseDate } from "../../utils/common";
import { theme } from "../../theme";

import parse from "html-react-parser";
import FolderIcon from "@mui/icons-material/Folder";

// Function to parse the elastic search parameter
// If it contains HTML tags like <mark>, it removes them
Expand Down Expand Up @@ -52,6 +53,28 @@ function buildDatasetResult(item) {
);
}

function buildFolderResult(item) {
return (
<>
<ListItemAvatar sx={{ color: theme.palette.primary.main }}>
<FolderIcon />
</ListItemAvatar>
<Box sx={{ marginTop: "5px" }}>
<MuiLink
component={Link}
to={`/public_datasets/${item.dataset_id}?folder=${item._id}`}
sx={{ fontWeight: "bold", fontSize: "18px" }}
>
{parseString(item.name)}
</MuiLink>
<Typography variant="body2" color={theme.palette.info.main}>
Created by {parseString(item.creator)} at {parseDate(item.created)}
</Typography>
</Box>
</>
);
}

function buildFileResult(item) {
return (
<>
Expand Down Expand Up @@ -93,7 +116,11 @@ export function PublicSearchResult(props) {
<ListItem alignItems="flex-start" key={item._id}>
{item.resource_type === "dataset"
? buildDatasetResult(item)
: buildFileResult(item)}
: item.resource_type === "file"
? buildFileResult(item)
: item.resource_type === "folder"
? buildFolderResult(item)
: null}
</ListItem>
))}
</List>
Expand Down
6 changes: 4 additions & 2 deletions frontend/src/components/search/Search.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -204,18 +204,19 @@ export function Search() {
{luceneOn ? (
<ReactiveList
componentId="results"
dataField="_score"
dataField="created"
size={20}
pagination={true}
react={{
and: ["string-searchbox"],
}}
render={({ data }) => <SearchResult data={data} />}
sortBy="desc"
/>
) : (
<ReactiveList
componentId="results"
dataField="_score"
dataField="created"
size={20}
pagination={true}
react={{
Expand All @@ -230,6 +231,7 @@ export function Search() {
render={({ data }) => {
return <SearchResult data={data} />;
}}
sortBy="desc"
/>
)}
</ReactiveBase>
Expand Down
Loading