From 78246ef8a3ef8565c788a017b182f7c7622115a7 Mon Sep 17 00:00:00 2001 From: Han Date: Sat, 16 May 2020 22:01:57 +0800 Subject: [PATCH 1/7] remove payload field from table sys.segments --- .../sql/calcite/schema/SystemSchema.java | 103 ++++++++---------- .../src/views/segments-view/segments-view.tsx | 14 +-- 2 files changed, 47 insertions(+), 70 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/schema/SystemSchema.java b/sql/src/main/java/org/apache/druid/sql/calcite/schema/SystemSchema.java index c6bff43ac075..fb8d3a8ad129 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/schema/SystemSchema.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/schema/SystemSchema.java @@ -19,7 +19,6 @@ package org.apache.druid.sql.calcite.schema; -import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.JavaType; import com.fasterxml.jackson.databind.ObjectMapper; @@ -56,7 +55,6 @@ import org.apache.druid.discovery.NodeRole; import org.apache.druid.indexer.TaskStatusPlus; import org.apache.druid.indexing.overlord.supervisor.SupervisorStatus; -import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.java.util.http.client.Request; @@ -143,7 +141,6 @@ public class SystemSchema extends AbstractSchema .add("is_available", ValueType.LONG) .add("is_realtime", ValueType.LONG) .add("is_overshadowed", ValueType.LONG) - .add("payload", ValueType.STRING) .build(); static final RowSignature SERVERS_SIGNATURE = RowSignature @@ -294,37 +291,31 @@ public Enumerable scan(DataContext root) final FluentIterable publishedSegments = FluentIterable .from(() -> getAuthorizedPublishedSegments(metadataStoreSegments, root)) .transform(val -> { - try { - final DataSegment segment = val.getDataSegment(); - segmentsAlreadySeen.add(segment.getId()); - final PartialSegmentData partialSegmentData = partialSegmentDataMap.get(segment.getId()); - long numReplicas = 0L, numRows = 0L, isRealtime = 0L, isAvailable = 0L; - if (partialSegmentData != null) { - numReplicas = partialSegmentData.getNumReplicas(); - numRows = partialSegmentData.getNumRows(); - isAvailable = partialSegmentData.isAvailable(); - isRealtime = partialSegmentData.isRealtime(); - } - return new Object[]{ - segment.getId(), - segment.getDataSource(), - segment.getInterval().getStart().toString(), - segment.getInterval().getEnd().toString(), - segment.getSize(), - segment.getVersion(), - Long.valueOf(segment.getShardSpec().getPartitionNum()), - numReplicas, - numRows, - IS_PUBLISHED_TRUE, //is_published is true for published segments - isAvailable, - isRealtime, - val.isOvershadowed() ? IS_OVERSHADOWED_TRUE : IS_OVERSHADOWED_FALSE, - jsonMapper.writeValueAsString(val) - }; - } - catch (JsonProcessingException e) { - throw new RE(e, "Error getting segment payload for segment %s", val.getDataSegment().getId()); + final DataSegment segment = val.getDataSegment(); + segmentsAlreadySeen.add(segment.getId()); + final PartialSegmentData partialSegmentData = partialSegmentDataMap.get(segment.getId()); + long numReplicas = 0L, numRows = 0L, isRealtime = 0L, isAvailable = 0L; + if (partialSegmentData != null) { + numReplicas = partialSegmentData.getNumReplicas(); + numRows = partialSegmentData.getNumRows(); + isAvailable = partialSegmentData.isAvailable(); + isRealtime = partialSegmentData.isRealtime(); } + return new Object[]{ + segment.getId(), + segment.getDataSource(), + segment.getInterval().getStart().toString(), + segment.getInterval().getEnd().toString(), + segment.getSize(), + segment.getVersion(), + Long.valueOf(segment.getShardSpec().getPartitionNum()), + numReplicas, + numRows, + IS_PUBLISHED_TRUE, //is_published is true for published segments + isAvailable, + isRealtime, + val.isOvershadowed() ? IS_OVERSHADOWED_TRUE : IS_OVERSHADOWED_FALSE + }; }); final FluentIterable availableSegments = FluentIterable @@ -333,33 +324,27 @@ public Enumerable scan(DataContext root) root )) .transform(val -> { - try { - if (segmentsAlreadySeen.contains(val.getKey())) { - return null; - } - final PartialSegmentData partialSegmentData = partialSegmentDataMap.get(val.getKey()); - final long numReplicas = partialSegmentData == null ? 0L : partialSegmentData.getNumReplicas(); - return new Object[]{ - val.getKey(), - val.getKey().getDataSource(), - val.getKey().getInterval().getStart().toString(), - val.getKey().getInterval().getEnd().toString(), - val.getValue().getSegment().getSize(), - val.getKey().getVersion(), - (long) val.getValue().getSegment().getShardSpec().getPartitionNum(), - numReplicas, - val.getValue().getNumRows(), - IS_PUBLISHED_FALSE, // is_published is false for unpublished segments - // is_available is assumed to be always true for segments announced by historicals or realtime tasks - IS_AVAILABLE_TRUE, - val.getValue().isRealtime(), - IS_OVERSHADOWED_FALSE, // there is an assumption here that unpublished segments are never overshadowed - jsonMapper.writeValueAsString(val.getKey()) - }; - } - catch (JsonProcessingException e) { - throw new RE(e, "Error getting segment payload for segment %s", val.getKey()); + if (segmentsAlreadySeen.contains(val.getKey())) { + return null; } + final PartialSegmentData partialSegmentData = partialSegmentDataMap.get(val.getKey()); + final long numReplicas = partialSegmentData == null ? 0L : partialSegmentData.getNumReplicas(); + return new Object[]{ + val.getKey(), + val.getKey().getDataSource(), + val.getKey().getInterval().getStart().toString(), + val.getKey().getInterval().getEnd().toString(), + val.getValue().getSegment().getSize(), + val.getKey().getVersion(), + (long) val.getValue().getSegment().getShardSpec().getPartitionNum(), + numReplicas, + val.getValue().getNumRows(), + IS_PUBLISHED_FALSE, // is_published is false for unpublished segments + // is_available is assumed to be always true for segments announced by historicals or realtime tasks + IS_AVAILABLE_TRUE, + val.getValue().isRealtime(), + IS_OVERSHADOWED_FALSE // there is an assumption here that unpublished segments are never overshadowed + }; }); final Iterable allSegments = Iterables.unmodifiableIterable( diff --git a/web-console/src/views/segments-view/segments-view.tsx b/web-console/src/views/segments-view/segments-view.tsx index a7d4e398d6dc..474e67215fa5 100644 --- a/web-console/src/views/segments-view/segments-view.tsx +++ b/web-console/src/views/segments-view/segments-view.tsx @@ -146,7 +146,6 @@ interface SegmentQueryResultRow { version: string; size: 0; partition_num: number; - payload: any; num_rows: number; num_replicas: number; is_available: number; @@ -211,6 +210,7 @@ export class SegmentsView extends React.PureComponent { - try { - result.payload = JSON.parse(result.payload); - } catch { - result.payload = {}; - } - }); return results; }, onStateChange: ({ result, loading, error }) => { @@ -299,7 +292,6 @@ export class SegmentsView extends React.PureComponent Date: Sun, 17 May 2020 09:01:50 +0800 Subject: [PATCH 2/7] update doc --- docs/querying/sql.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/querying/sql.md b/docs/querying/sql.md index 80f8a399bf33..3171f830f2ef 100644 --- a/docs/querying/sql.md +++ b/docs/querying/sql.md @@ -986,7 +986,6 @@ Segments table provides details on all Druid segments, whether they are publishe |is_available|LONG|Boolean is represented as long type where 1 = true, 0 = false. 1 if this segment is currently being served by any process(Historical or realtime). See the [Architecture page](../design/architecture.md#segment-lifecycle) for more details.| |is_realtime|LONG|Boolean is represented as long type where 1 = true, 0 = false. 1 if this segment is _only_ served by realtime tasks, and 0 if any historical process is serving this segment.| |is_overshadowed|LONG|Boolean is represented as long type where 1 = true, 0 = false. 1 if this segment is published and is _fully_ overshadowed by some other published segments. Currently, is_overshadowed is always false for unpublished segments, although this may change in the future. You can filter for segments that "should be published" by filtering for `is_published = 1 AND is_overshadowed = 0`. Segments can briefly be both published and overshadowed if they were recently replaced, but have not been unpublished yet. See the [Architecture page](../design/architecture.md#segment-lifecycle) for more details.| -|payload|STRING|JSON-serialized data segment payload| For example to retrieve all segments for datasource "wikipedia", use the query: From 2d10d48f6fca65482a660fb100b913d2d2e932d4 Mon Sep 17 00:00:00 2001 From: Han Date: Sun, 17 May 2020 10:05:21 +0800 Subject: [PATCH 3/7] fix test --- .../org/apache/druid/sql/calcite/schema/SystemSchemaTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/schema/SystemSchemaTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/schema/SystemSchemaTest.java index 80bf83030f35..0a7c804efc59 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/schema/SystemSchemaTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/schema/SystemSchemaTest.java @@ -467,7 +467,7 @@ public void testGetTableMap() final RelDataType rowType = segmentsTable.getRowType(new JavaTypeFactoryImpl()); final List fields = rowType.getFieldList(); - Assert.assertEquals(14, fields.size()); + Assert.assertEquals(13, fields.size()); final SystemSchema.TasksTable tasksTable = (SystemSchema.TasksTable) schema.getTableMap().get("tasks"); final RelDataType sysRowType = tasksTable.getRowType(new JavaTypeFactoryImpl()); From 4bc81881364b88ed764bb14e9847b6801ae94181 Mon Sep 17 00:00:00 2001 From: Han Date: Mon, 8 Jun 2020 08:40:49 +0800 Subject: [PATCH 4/7] fix CI failure --- .../test/resources/results/auth_test_sys_schema_segments.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/integration-tests/src/test/resources/results/auth_test_sys_schema_segments.json b/integration-tests/src/test/resources/results/auth_test_sys_schema_segments.json index a169cfe87361..7b67022b8422 100644 --- a/integration-tests/src/test/resources/results/auth_test_sys_schema_segments.json +++ b/integration-tests/src/test/resources/results/auth_test_sys_schema_segments.json @@ -12,7 +12,6 @@ "is_published": 1, "is_available": 1, "is_realtime": 0, - "is_overshadowed": 0, - "payload": "{\"overshadowed\":false,\"dataSource\":\"auth_test\",\"interval\":\"2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z\",\"version\":\"2013-01-10T08:13:47.830Z_v9\",\"loadSpec\":{\"load spec is pruned, because it's not needed on Brokers, but eats a lot of heap space\":\"\"},\"dimensions\":\"anonymous,area_code,city,continent_code,country_name,dma_code,geo,language,namespace,network,newpage,page,postal_code,region_lookup,robot,unpatrolled,user\",\"metrics\":\"added,count,deleted,delta,delta_hist,unique_users,variation\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":446027801,\"identifier\":\"auth_test_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\"}" + "is_overshadowed": 0 } ] From a36fe1cc0b5a87184992d22a83597a6461ad37fd Mon Sep 17 00:00:00 2001 From: Han Date: Mon, 22 Jun 2020 18:45:45 +0800 Subject: [PATCH 5/7] add necessary fields --- .../results/auth_test_sys_schema_segments.json | 5 ++++- .../druid/sql/calcite/schema/SystemSchema.java | 13 +++++++++++-- .../druid/sql/calcite/schema/SystemSchemaTest.java | 7 ++++++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/integration-tests/src/test/resources/results/auth_test_sys_schema_segments.json b/integration-tests/src/test/resources/results/auth_test_sys_schema_segments.json index 7b67022b8422..d59b14fa98b4 100644 --- a/integration-tests/src/test/resources/results/auth_test_sys_schema_segments.json +++ b/integration-tests/src/test/resources/results/auth_test_sys_schema_segments.json @@ -12,6 +12,9 @@ "is_published": 1, "is_available": 1, "is_realtime": 0, - "is_overshadowed": 0 + "is_overshadowed": 0, + "shardSpec": "NoneShardSpec", + "dimensions": "[anonymous, area_code, city, continent_code, country_name, dma_code, geo, language, namespace, network, newpage, page, postal_code, region_lookup, robot, unpatrolled, user]", + "metrics": "[added, count, deleted, delta, delta_hist, unique_users, variation]" } ] diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/schema/SystemSchema.java b/sql/src/main/java/org/apache/druid/sql/calcite/schema/SystemSchema.java index fb8d3a8ad129..6daab43ee288 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/schema/SystemSchema.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/schema/SystemSchema.java @@ -141,6 +141,9 @@ public class SystemSchema extends AbstractSchema .add("is_available", ValueType.LONG) .add("is_realtime", ValueType.LONG) .add("is_overshadowed", ValueType.LONG) + .add("shardSpec", ValueType.STRING) + .add("dimensions", ValueType.STRING) + .add("metrics", ValueType.STRING) .build(); static final RowSignature SERVERS_SIGNATURE = RowSignature @@ -314,7 +317,10 @@ public Enumerable scan(DataContext root) IS_PUBLISHED_TRUE, //is_published is true for published segments isAvailable, isRealtime, - val.isOvershadowed() ? IS_OVERSHADOWED_TRUE : IS_OVERSHADOWED_FALSE + val.isOvershadowed() ? IS_OVERSHADOWED_TRUE : IS_OVERSHADOWED_FALSE, + segment.getShardSpec(), + segment.getDimensions(), + segment.getMetrics() }; }); @@ -343,7 +349,10 @@ public Enumerable scan(DataContext root) // is_available is assumed to be always true for segments announced by historicals or realtime tasks IS_AVAILABLE_TRUE, val.getValue().isRealtime(), - IS_OVERSHADOWED_FALSE // there is an assumption here that unpublished segments are never overshadowed + IS_OVERSHADOWED_FALSE, // there is an assumption here that unpublished segments are never overshadowed + null, + null, + null }; }); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/schema/SystemSchemaTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/schema/SystemSchemaTest.java index 0a7c804efc59..d48fd138a1d8 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/schema/SystemSchemaTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/schema/SystemSchemaTest.java @@ -88,6 +88,7 @@ import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.SegmentWithOvershadowedStatus; import org.apache.druid.timeline.partition.NumberedShardSpec; +import org.apache.druid.timeline.partition.ShardSpec; import org.easymock.EasyMock; import org.jboss.netty.handler.codec.http.HttpMethod; import org.jboss.netty.handler.codec.http.HttpResponse; @@ -467,7 +468,7 @@ public void testGetTableMap() final RelDataType rowType = segmentsTable.getRowType(new JavaTypeFactoryImpl()); final List fields = rowType.getFieldList(); - Assert.assertEquals(13, fields.size()); + Assert.assertEquals(16, fields.size()); final SystemSchema.TasksTable tasksTable = (SystemSchema.TasksTable) schema.getTableMap().get("tasks"); final RelDataType sysRowType = tasksTable.getRowType(new JavaTypeFactoryImpl()); @@ -1243,6 +1244,10 @@ private static void verifyTypes(final List rows, final RowSignature si case STRING: if (signature.getColumnName(i).equals("segment_id")) { expectedClass = SegmentId.class; + } else if (signature.getColumnName(i).equals("shardSpec")) { + expectedClass = ShardSpec.class; + } else if (signature.getColumnName(i).equals("dimensions") || signature.getColumnName(i).equals("metrics")) { + expectedClass = List.class; } else { expectedClass = String.class; } From b713308d75d926275475f9db28ce00485cf9058d Mon Sep 17 00:00:00 2001 From: Han Date: Mon, 22 Jun 2020 19:08:22 +0800 Subject: [PATCH 6/7] fix doc --- docs/querying/sql.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/querying/sql.md b/docs/querying/sql.md index 3171f830f2ef..239427494d1c 100644 --- a/docs/querying/sql.md +++ b/docs/querying/sql.md @@ -986,6 +986,9 @@ Segments table provides details on all Druid segments, whether they are publishe |is_available|LONG|Boolean is represented as long type where 1 = true, 0 = false. 1 if this segment is currently being served by any process(Historical or realtime). See the [Architecture page](../design/architecture.md#segment-lifecycle) for more details.| |is_realtime|LONG|Boolean is represented as long type where 1 = true, 0 = false. 1 if this segment is _only_ served by realtime tasks, and 0 if any historical process is serving this segment.| |is_overshadowed|LONG|Boolean is represented as long type where 1 = true, 0 = false. 1 if this segment is published and is _fully_ overshadowed by some other published segments. Currently, is_overshadowed is always false for unpublished segments, although this may change in the future. You can filter for segments that "should be published" by filtering for `is_published = 1 AND is_overshadowed = 0`. Segments can briefly be both published and overshadowed if they were recently replaced, but have not been unpublished yet. See the [Architecture page](../design/architecture.md#segment-lifecycle) for more details.| +|shardSpec|STRING|The toString of specific `ShardSpec`| +|dimensions|STRING|The dimensions of the segment| +|metrics|STRING|The metrics of the segment| For example to retrieve all segments for datasource "wikipedia", use the query: From 89a0d6b59d907bf520c6db81525bc0dba8b464a2 Mon Sep 17 00:00:00 2001 From: Han Date: Mon, 29 Jun 2020 11:32:34 +0800 Subject: [PATCH 7/7] fix comment --- .../org/apache/druid/sql/calcite/schema/SystemSchema.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/schema/SystemSchema.java b/sql/src/main/java/org/apache/druid/sql/calcite/schema/SystemSchema.java index 6daab43ee288..72633a1097ed 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/schema/SystemSchema.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/schema/SystemSchema.java @@ -350,9 +350,9 @@ public Enumerable scan(DataContext root) IS_AVAILABLE_TRUE, val.getValue().isRealtime(), IS_OVERSHADOWED_FALSE, // there is an assumption here that unpublished segments are never overshadowed - null, - null, - null + val.getValue().getSegment().getShardSpec(), + val.getValue().getSegment().getDimensions(), + val.getValue().getSegment().getMetrics() }; });