diff --git a/examples/bin/dump-segment b/examples/bin/dump-segment new file mode 100644 index 000000000000..04c0072fef8a --- /dev/null +++ b/examples/bin/dump-segment @@ -0,0 +1,32 @@ +#!/bin/bash -eu + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +PWD="$(pwd)" +WHEREAMI="$(dirname "$0")" +WHATAMI="dump-segment" +CONFDIR="$WHEREAMI/../conf/druid/tools/" +MAIN_CLASS="org.apache.druid.cli.Main tools dump-segment" + +cd "$WHEREAMI/.." + +CLASS_PATH="$CONFDIR"/"$WHATAMI":"$CONFDIR"/_common:"$CONFDIR"/../_common:"$WHEREAMI/../lib/*" + +exec "$WHEREAMI"/run-java \ + `cat "$CONFDIR"/"$WHATAMI"/jvm.config | xargs` \ + -cp $CLASS_PATH $MAIN_CLASS `echo "${@:1}"` \ No newline at end of file diff --git a/examples/conf/druid/tools/_common/common.runtime.properties b/examples/conf/druid/tools/_common/common.runtime.properties new file mode 100644 index 000000000000..6b0665c2c93f --- /dev/null +++ b/examples/conf/druid/tools/_common/common.runtime.properties @@ -0,0 +1,2 @@ +## common tool extensions +druid.extensions.loadList=["druid-datasketches"] diff --git a/examples/conf/druid/tools/_common/log4j2.xml b/examples/conf/druid/tools/_common/log4j2.xml new file mode 100644 index 000000000000..756094c9ca12 --- /dev/null +++ b/examples/conf/druid/tools/_common/log4j2.xml @@ -0,0 +1,26 @@ + + + + + + + + + diff --git a/examples/conf/druid/tools/dump-segment/jvm.config b/examples/conf/druid/tools/dump-segment/jvm.config new file mode 100644 index 000000000000..cb2403c5f3b0 --- /dev/null +++ b/examples/conf/druid/tools/dump-segment/jvm.config @@ -0,0 +1,4 @@ +-server +-XX:+ExitOnOutOfMemoryError +-Duser.timezone=UTC +-Dfile.encoding=UTF-8 diff --git a/services/src/main/java/org/apache/druid/cli/DumpSegment.java b/services/src/main/java/org/apache/druid/cli/DumpSegment.java index fb8620df4fce..1986fb1ad9ac 100644 --- a/services/src/main/java/org/apache/druid/cli/DumpSegment.java +++ b/services/src/main/java/org/apache/druid/cli/DumpSegment.java @@ -88,6 +88,7 @@ import org.apache.druid.segment.data.FixedIndexed; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; +import org.apache.druid.segment.file.SegmentFileMapperV10; import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.nested.CompressedNestedDataComplexColumn; @@ -125,7 +126,8 @@ private enum DumpType ROWS, METADATA, BITMAPS, - NESTED + NESTED, + METADATA_V10 } public DumpSegment() @@ -194,6 +196,11 @@ public void run() throw new IAE("Not a valid dump type: %s", dumpTypeString); } + if (dumpType == DumpType.METADATA_V10) { + dumpV10Metadata(injector, directory, outputFileName); + return; + } + try (final QueryableIndex index = indexIO.loadIndex(new File(directory))) { switch (dumpType) { case ROWS: @@ -690,6 +697,28 @@ public static void runDumpNestedColumnPath( outputFileName ); } + @VisibleForTesting + public static void dumpV10Metadata(Injector injector, String segmentFile, String output) + { + final ObjectMapper objectMapper = injector.getInstance(Key.get(ObjectMapper.class, Json.class)); + try (SegmentFileMapperV10 fileMapperV10 = SegmentFileMapperV10.create(new File(segmentFile), objectMapper)) { + withOutputStream( + (Function) outStream -> { + try { + objectMapper.writeValue(outStream, fileMapperV10.getSegmentFileMetadata()); + } + catch (IOException e) { + throw new RuntimeException(e); + } + return null; + }, + output + ); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } @VisibleForTesting public static List getColumnsToInclude(final QueryableIndex index, List columns) diff --git a/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java b/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java index 47b0ae9534f1..186d109b1f53 100644 --- a/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java +++ b/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java @@ -28,6 +28,7 @@ import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.collections.bitmap.RoaringBitmapFactory; +import org.apache.druid.data.input.ResourceInputSource; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.guice.BuiltInTypesModule; @@ -51,6 +52,8 @@ import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.segment.DefaultColumnFormatConfig; +import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.Segment; @@ -59,6 +62,8 @@ import org.apache.druid.segment.column.BaseColumnHolder; import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnIndexSupplier; +import org.apache.druid.segment.file.SegmentFileMetadata; +import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.After; @@ -273,6 +278,72 @@ public void testGetModules() Assert.assertEquals(-1, (int) injector.getInstance(Key.get(Integer.class, Names.named("tlsServicePort")))); } + @Test + public void testDumpV10Metadata() throws IOException + { + Injector injector = Mockito.mock(Injector.class); + ObjectMapper mapper = TestHelper.makeJsonMapper(); + mapper.registerModules(BuiltInTypesModule.getJacksonModulesList()); + mapper.setInjectableValues( + new InjectableValues.Std() + .addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE) + .addValue(ObjectMapper.class.getName(), mapper) + .addValue(DefaultColumnFormatConfig.class, new DefaultColumnFormatConfig(null, null, null)) + ); + Mockito.when(injector.getInstance(Key.get(ObjectMapper.class, Json.class))).thenReturn(mapper); + Mockito.when(injector.getInstance(DefaultColumnFormatConfig.class)).thenReturn(new DefaultColumnFormatConfig(null, null, null)); + + File f = buildV10Segment(); + + File outputFile = tempFolder.newFile(); + DumpSegment.dumpV10Metadata( + injector, + f.getPath() + "/" + IndexIO.V10_FILE_NAME, + outputFile.getPath() + ); + final byte[] fileBytes = Files.readAllBytes(outputFile.toPath()); + SegmentFileMetadata dumped = mapper.readValue(fileBytes, SegmentFileMetadata.class); + Assert.assertNotNull(dumped); + Assert.assertEquals(1, dumped.getContainers().size()); + Assert.assertEquals(2, dumped.getColumnDescriptors().size()); + Assert.assertEquals(12, dumped.getFiles().size()); + } + + + private File buildV10Segment() throws IOException + { + final File segmentDir = tempFolder.newFolder(); + IndexBuilder bob = IndexBuilder.create() + .useV10() + .tmpDir(segmentDir) + .schema( + IncrementalIndexSchema.builder() + .withTimestampSpec( + new TimestampSpec( + "timestamp", + null, + null + ) + ) + .withDimensionsSpec( + DimensionsSpec.builder() + .useSchemaDiscovery(true) + .build() + ) + .withQueryGranularity(Granularities.NONE) + .withRollup(false) + .withMinTimestamp(0) + .build() + ) + .inputSource(ResourceInputSource.of( + getClass().getClassLoader(), + "nested-test-data.json" + )) + .inputFormat(TestIndex.DEFAULT_JSON_INPUT_FORMAT) + .inputTmpDir(tempFolder.newFolder()); + return bob.buildMMappedIndexFile(); + } + public static List createSegments( TemporaryFolder tempFolder,