From e24fb910c6175d0c20577faf9b7b6cf5f4f1f7df Mon Sep 17 00:00:00 2001 From: leventov Date: Thu, 20 Sep 2018 21:42:39 +0400 Subject: [PATCH 1/2] Improve interning in SQLMetadataSegmentManager --- .../apache/druid/client/DruidDataSource.java | 7 +++++ .../metadata/SQLMetadataSegmentManager.java | 31 ++++++++++++++----- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/apache/druid/client/DruidDataSource.java b/server/src/main/java/org/apache/druid/client/DruidDataSource.java index d280e30a5b79..ee8b574eaab2 100644 --- a/server/src/main/java/org/apache/druid/client/DruidDataSource.java +++ b/server/src/main/java/org/apache/druid/client/DruidDataSource.java @@ -23,6 +23,7 @@ import com.google.common.base.Preconditions; import org.apache.druid.timeline.DataSegment; +import javax.annotation.Nullable; import java.util.Collection; import java.util.Collections; import java.util.Map; @@ -63,6 +64,12 @@ public Collection getSegments() return Collections.unmodifiableCollection(idToSegmentMap.values()); } + @Nullable + public DataSegment getSegment(String segmentId) + { + return idToSegmentMap.get(segmentId); + } + public DruidDataSource addSegment(DataSegment dataSegment) { idToSegmentMap.put(dataSegment.getIdentifier(), dataSegment); diff --git a/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentManager.java b/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentManager.java index c072ee6717d5..71da4e0ca9d0 100644 --- a/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentManager.java +++ b/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentManager.java @@ -25,8 +25,6 @@ import com.google.common.base.Throwables; import com.google.common.collect.Collections2; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Interner; -import com.google.common.collect.Interners; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.inject.Inject; @@ -82,7 +80,6 @@ @ManageLifecycle public class SQLMetadataSegmentManager implements MetadataSegmentManager { - private static final Interner DATA_SEGMENT_INTERNER = Interners.newWeakInterner(); private static final EmittingLogger log = new EmittingLogger(SQLMetadataSegmentManager.class); /** @@ -232,7 +229,7 @@ public boolean enableDatasource(final String ds) .iterator(), payload -> { try { - return DATA_SEGMENT_INTERNER.intern(jsonMapper.readValue(payload, DataSegment.class)); + return jsonMapper.readValue(payload, DataSegment.class); } catch (IOException e) { throw new RuntimeException(e); @@ -466,10 +463,9 @@ public DataSegment map(int index, ResultSet r, StatementContext ctx) throws SQLException { try { - return DATA_SEGMENT_INTERNER.intern(jsonMapper.readValue( - r.getBytes("payload"), - DataSegment.class - )); + return replaceWithExistingSegmentIfPresent( + jsonMapper.readValue(r.getBytes("payload"), DataSegment.class) + ); } catch (IOException e) { log.makeAlert(e, "Failed to read segment from db.").emit(); @@ -535,6 +531,25 @@ public DataSegment map(int index, ResultSet r, StatementContext ctx) } } + /** + * For the garbage collector in Java, it's better to keep new objects short-living, but once they are old enough + * (i. e. promoted to old generation), try to keep them alive. In {@link #poll()}, we fetch and deserialize all + * existing segments each time, and them replace them in {@link #dataSourcesRef}. This method allows to use already + * existing (old) segments when possible, effectively interning them a-la {@link String#intern} or {@link + * com.google.common.collect.Interner}, aiming to make the most of {@link DataSegment} objects garbage soon after + * they are deserialized and to die in young generation. It allows to avoid fragmentation of the old generation and + * full GCs. + */ + private DataSegment replaceWithExistingSegmentIfPresent(DataSegment segment) + { + DruidDataSource dataSource = dataSourcesRef.get().get(segment.getDataSource()); + if (dataSource == null) { + return segment; + } + DataSegment alreadyExistingSegment = dataSource.getSegment(segment.getIdentifier()); + return alreadyExistingSegment != null ? alreadyExistingSegment : segment; + } + private String getSegmentsTable() { return dbTables.get().getSegmentsTable(); From 7edf844766f4e3597c3ae2714e777fc1a395dff5 Mon Sep 17 00:00:00 2001 From: leventov Date: Sat, 22 Sep 2018 09:49:38 +0300 Subject: [PATCH 2/2] typo --- .../org/apache/druid/metadata/SQLMetadataSegmentManager.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentManager.java b/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentManager.java index 71da4e0ca9d0..acf149445be8 100644 --- a/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentManager.java +++ b/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentManager.java @@ -534,9 +534,9 @@ public DataSegment map(int index, ResultSet r, StatementContext ctx) /** * For the garbage collector in Java, it's better to keep new objects short-living, but once they are old enough * (i. e. promoted to old generation), try to keep them alive. In {@link #poll()}, we fetch and deserialize all - * existing segments each time, and them replace them in {@link #dataSourcesRef}. This method allows to use already + * existing segments each time, and then replace them in {@link #dataSourcesRef}. This method allows to use already * existing (old) segments when possible, effectively interning them a-la {@link String#intern} or {@link - * com.google.common.collect.Interner}, aiming to make the most of {@link DataSegment} objects garbage soon after + * com.google.common.collect.Interner}, aiming to make the majority of {@link DataSegment} objects garbage soon after * they are deserialized and to die in young generation. It allows to avoid fragmentation of the old generation and * full GCs. */