Skip to content

Commit b7470b7

Browse files
authored
Merge pull request #3399 from vkWeb/optimize/search
Import from other channels search optimized
2 parents 70f45cd + aae3be1 commit b7470b7

File tree

21 files changed

+354
-275
lines changed

21 files changed

+354
-275
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ learningactivities:
3131

3232
set-tsvectors:
3333
python contentcuration/manage.py set_channel_tsvectors
34-
python contentcuration/manage.py set_contentnode_tsvectors
34+
python contentcuration/manage.py set_contentnode_tsvectors --published
3535

3636
###############################################################
3737
# END PRODUCTION COMMANDS #####################################

contentcuration/contentcuration/debug/middleware.py

Lines changed: 0 additions & 47 deletions
This file was deleted.

contentcuration/contentcuration/debug_panel_settings.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
from .dev_settings import * # noqa
22

3-
# These endpoints will throw an error on the django debug panel
3+
# These endpoints will throw an error on the django debug panel.
44
EXCLUDED_DEBUG_URLS = [
55
"/content/storage",
6+
7+
# Disabling sync API because as soon as the sync API gets polled
8+
# the current request data gets overwritten.
9+
# Can be removed after websockets deployment.
10+
"/api/sync",
611
]
712

813
DEBUG_PANEL_ACTIVE = True
@@ -14,10 +19,10 @@ def custom_show_toolbar(request):
1419
) # noqa F405
1520

1621

17-
# if debug_panel exists, add it to our INSTALLED_APPS
22+
# if debug_panel exists, add it to our INSTALLED_APPS.
1823
INSTALLED_APPS += ("debug_panel", "debug_toolbar", "pympler") # noqa F405
1924
MIDDLEWARE += ( # noqa F405
20-
"contentcuration.debug.middleware.CustomDebugPanelMiddleware",
25+
"debug_toolbar.middleware.DebugToolbarMiddleware",
2126
)
2227
DEBUG_TOOLBAR_CONFIG = {
2328
"SHOW_TOOLBAR_CALLBACK": custom_show_toolbar,

contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@
149149
if (this.isTopic) {
150150
return `${baseUrl}#/${this.node.id}`;
151151
}
152-
return `${baseUrl}#/${this.node.parent}/${this.node.id}`;
152+
return `${baseUrl}#/${this.node.parent_id}/${this.node.id}`;
153153
},
154154
resourcesMsg() {
155155
let count;
@@ -160,13 +160,8 @@
160160
}
161161
return this.$tr('resourcesCount', { count });
162162
},
163-
numLocations() {
164-
return this.node.location_ids.length;
165-
},
166163
goToLocationLabel() {
167-
return this.numLocations > 1
168-
? this.$tr('goToPluralLocationsAction', { count: this.numLocations })
169-
: this.$tr('goToSingleLocationAction');
164+
return this.$tr('goToSingleLocationAction');
170165
},
171166
isTopic() {
172167
return this.node.kind === ContentKindsNames.TOPIC;
@@ -189,8 +184,6 @@
189184
$trs: {
190185
tagsList: 'Tags: {tags}',
191186
goToSingleLocationAction: 'Go to location',
192-
goToPluralLocationsAction:
193-
'In {count, number} {count, plural, one {location} other {locations}}',
194187
addToClipboardAction: 'Copy to clipboard',
195188
resourcesCount: '{count, number} {count, plural, one {resource} other {resources}}',
196189
coach: 'Resource for coaches',

contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/ChannelList.vue

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@
112112
[this.channelFilter]: true,
113113
page: this.$route.query.page || 1,
114114
exclude: this.currentChannelId,
115+
published: true,
115116
}).then(page => {
116117
this.pageCount = page.total_pages;
117118
this.channels = page.results;

contentcuration/contentcuration/tests/helpers.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
from importlib import import_module
33

44
import mock
5+
from search.models import ContentNodeFullTextSearch
56

7+
from contentcuration.models import ContentNode
68
from contentcuration.models import TaskResult
79

810

@@ -39,6 +41,12 @@ def mock_class_instance(target):
3941
else:
4042
target_cls = target
4143

44+
# ContentNode's node_fts field can be handled by Django when tests
45+
# access the database but we mock it so that we don't need to query
46+
# the database. By doing so we get faster test execution.
47+
if type(target_cls) is ContentNode:
48+
target_cls.node_fts = ContentNodeFullTextSearch()
49+
4250
class MockClass(target_cls):
4351
def __new__(cls, *args, **kwargs):
4452
return mock.Mock(spec_set=cls)

contentcuration/contentcuration/tests/utils/test_cache.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
import mock
2-
from django.test import TestCase
2+
from django.test import SimpleTestCase
33

44
from ..helpers import mock_class_instance
5-
from contentcuration.models import ContentNode
65
from contentcuration.utils.cache import ResourceSizeCache
76

87

9-
class ResourceSizeCacheTestCase(TestCase):
8+
class ResourceSizeCacheTestCase(SimpleTestCase):
109
def setUp(self):
1110
super(ResourceSizeCacheTestCase, self).setUp()
12-
self.node = mock.Mock(spec_set=ContentNode())
11+
self.node = mock_class_instance("contentcuration.models.ContentNode")
1312
self.node.pk = "abcdefghijklmnopqrstuvwxyz"
1413
self.redis_client = mock_class_instance("redis.client.StrictRedis")
1514
self.cache_client = mock_class_instance("django_redis.client.DefaultClient")

contentcuration/contentcuration/tests/utils/test_nodes.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
from dateutil.parser import isoparse
77
from django.db.models import F
88
from django.db.models import Max
9-
from django.test import TestCase
9+
from django.test import SimpleTestCase
1010

1111
from ..base import StudioTestCase
12-
from contentcuration.models import ContentNode
12+
from contentcuration.tests.helpers import mock_class_instance
1313
from contentcuration.utils.nodes import calculate_resource_size
1414
from contentcuration.utils.nodes import ResourceSizeHelper
1515
from contentcuration.utils.nodes import SlowCalculationError
@@ -42,10 +42,10 @@ def test_modified_since(self):
4242

4343
@mock.patch("contentcuration.utils.nodes.ResourceSizeHelper")
4444
@mock.patch("contentcuration.utils.nodes.ResourceSizeCache")
45-
class CalculateResourceSizeTestCase(TestCase):
45+
class CalculateResourceSizeTestCase(SimpleTestCase):
4646
def setUp(self):
4747
super(CalculateResourceSizeTestCase, self).setUp()
48-
self.node = mock.Mock(spec_set=ContentNode())
48+
self.node = mock_class_instance("contentcuration.models.ContentNode")
4949

5050
def assertCalculation(self, cache, helper, force=False):
5151
helper().get_size.return_value = 456

contentcuration/contentcuration/utils/publish.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@
1818
from django.core.files.storage import default_storage as storage
1919
from django.core.management import call_command
2020
from django.db.models import Count
21+
from django.db.models import Exists
2122
from django.db.models import Max
23+
from django.db.models import OuterRef
2224
from django.db.models import Q
25+
from django.db.models import Subquery
2326
from django.db.models import Sum
2427
from django.db.utils import IntegrityError
2528
from django.template.loader import render_to_string
@@ -37,6 +40,10 @@
3740
from le_utils.constants import roles
3841
from past.builtins import basestring
3942
from past.utils import old_div
43+
from search.models import ChannelFullTextSearch
44+
from search.models import ContentNodeFullTextSearch
45+
from search.utils import get_fts_annotated_channel_qs
46+
from search.utils import get_fts_annotated_contentnode_qs
4047

4148
from contentcuration import models as ccmodels
4249
from contentcuration.decorators import delay_user_storage_calculation
@@ -808,6 +815,50 @@ def fill_published_fields(channel, version_notes):
808815
channel.save()
809816

810817

818+
def sync_contentnode_and_channel_tsvectors(channel_id):
819+
"""
820+
Creates, deletes and updates tsvectors of the channel and all its content nodes
821+
to reflect the current state of channel's main tree.
822+
"""
823+
# Update or create channel tsvector entry.
824+
logging.info("Setting tsvector for channel with id {}.".format(channel_id))
825+
826+
channel = (get_fts_annotated_channel_qs()
827+
.values("keywords_tsvector", "main_tree__tree_id")
828+
.get(pk=channel_id))
829+
830+
obj, is_created = ChannelFullTextSearch.objects.update_or_create(channel_id=channel_id, defaults={"keywords_tsvector": channel["keywords_tsvector"]})
831+
del obj
832+
833+
if is_created:
834+
logging.info("Created 1 channel tsvector.")
835+
else:
836+
logging.info("Updated 1 channel tsvector.")
837+
838+
# Update or create contentnodes tsvector entry for channel_id.
839+
logging.info("Setting tsvectors for all main tree contentnodes in channel {}.".format(channel_id))
840+
841+
if ContentNodeFullTextSearch.objects.filter(channel_id=channel_id).exists():
842+
# First, delete nodes that are no longer in main_tree.
843+
nodes_no_longer_in_main_tree = ~Exists(ccmodels.ContentNode.objects.filter(id=OuterRef("contentnode_id"), tree_id=channel["main_tree__tree_id"]))
844+
ContentNodeFullTextSearch.objects.filter(nodes_no_longer_in_main_tree, channel_id=channel_id).delete()
845+
846+
# Now, all remaining nodes are in main_tree, so let's update them.
847+
# Update only changed nodes.
848+
node_tsv_subquery = get_fts_annotated_contentnode_qs(channel_id).filter(id=OuterRef("contentnode_id")).order_by()
849+
ContentNodeFullTextSearch.objects.filter(channel_id=channel_id, contentnode__complete=True, contentnode__changed=True).update(
850+
keywords_tsvector=Subquery(node_tsv_subquery.values("keywords_tsvector")[:1]),
851+
author_tsvector=Subquery(node_tsv_subquery.values("author_tsvector")[:1])
852+
)
853+
854+
# Insert newly created nodes.
855+
# "set_contentnode_tsvectors" command is defined in "search/management/commands" directory.
856+
call_command("set_contentnode_tsvectors",
857+
"--channel-id={}".format(channel_id),
858+
"--tree-id={}".format(channel["main_tree__tree_id"]),
859+
"--complete")
860+
861+
811862
@delay_user_storage_calculation
812863
def publish_channel(
813864
user_id,
@@ -829,8 +880,9 @@ def publish_channel(
829880
set_channel_icon_encoding(channel)
830881
kolibri_temp_db = create_content_database(channel, force, user_id, force_exercises, progress_tracker=progress_tracker)
831882
increment_channel_version(channel)
832-
mark_all_nodes_as_published(channel)
833883
add_tokens_to_channel(channel)
884+
sync_contentnode_and_channel_tsvectors(channel_id=channel.id)
885+
mark_all_nodes_as_published(channel)
834886
fill_published_fields(channel, version_notes)
835887

836888
# Attributes not getting set for some reason, so just save it here

contentcuration/contentcuration/viewsets/channel.py

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
from rest_framework.serializers import CharField
2525
from rest_framework.serializers import FloatField
2626
from rest_framework.serializers import IntegerField
27+
from search.models import ChannelFullTextSearch
28+
from search.models import ContentNodeFullTextSearch
29+
from search.utils import get_fts_search_query
2730

2831
from contentcuration.decorators import cache_no_user_data
2932
from contentcuration.models import Change
@@ -119,23 +122,15 @@ def filter_deleted(self, queryset, name, value):
119122
return queryset.filter(deleted=value)
120123

121124
def filter_keywords(self, queryset, name, value):
122-
# TODO: Wait until we show more metadata on cards to add this back in
123-
# keywords_query = self.main_tree_query.filter(
124-
# Q(tags__tag_name__icontains=value)
125-
# | Q(author__icontains=value)
126-
# | Q(aggregator__icontains=value)
127-
# | Q(provider__icontains=value)
128-
# )
129-
return queryset.annotate(
130-
# keyword_match_count=SQCount(keywords_query, field="content_id"),
131-
primary_token=primary_token_subquery,
132-
).filter(
133-
Q(name__icontains=value)
134-
| Q(description__icontains=value)
135-
| Q(pk__istartswith=value)
136-
| Q(primary_token=value.replace("-", ""))
137-
# | Q(keyword_match_count__gt=0)
138-
)
125+
search_query = get_fts_search_query(value)
126+
dash_replaced_search_query = get_fts_search_query(value.replace("-", ""))
127+
128+
channel_keywords_query = (Exists(ChannelFullTextSearch.objects.filter(
129+
Q(keywords_tsvector=search_query) | Q(keywords_tsvector=dash_replaced_search_query), channel_id=OuterRef("id"))))
130+
contentnode_search_query = (Exists(ContentNodeFullTextSearch.objects.filter(
131+
Q(keywords_tsvector=search_query) | Q(author_tsvector=search_query), channel_id=OuterRef("id"))))
132+
133+
return queryset.filter(Q(channel_keywords_query) | Q(contentnode_search_query))
139134

140135
def filter_languages(self, queryset, name, value):
141136
languages = value.split(",")

0 commit comments

Comments
 (0)