From b63b50229b17640ca1668342e987e628c75eca1e Mon Sep 17 00:00:00 2001 From: miomiocat <284487410@qq.com> Date: Thu, 28 Apr 2022 19:10:44 +0800 Subject: [PATCH] [HUDI-3984] Remove mandatory check of partiton path for cli command --- .../hudi/cli/commands/CompactionCommand.java | 2 +- .../cli/commands/FileSystemViewCommand.java | 2 +- .../commands/HDFSParquetImportCommand.java | 2 +- .../hudi/cli/commands/MetadataCommand.java | 10 +- .../commands/TestFileSystemViewCommand.java | 121 ++++++++++++++---- 5 files changed, 106 insertions(+), 31 deletions(-) diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java index 097c68a542c47..d3845137c8e23 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java @@ -558,7 +558,7 @@ public String unscheduleCompaction( @CliCommand(value = "compaction unscheduleFileId", help = "UnSchedule Compaction for a fileId") public String unscheduleCompactFile( @CliOption(key = "fileId", mandatory = true, help = "File Id") final String fileId, - @CliOption(key = "partitionPath", mandatory = true, help = "partition path") final String partitionPath, + @CliOption(key = "partitionPath", unspecifiedDefaultValue = "", help = "partition path") final String partitionPath, @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master, @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory, @CliOption(key = {"skipValidation"}, help = "skip validation", unspecifiedDefaultValue = "false") boolean skipV, diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/FileSystemViewCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/FileSystemViewCommand.java index a506c8030a557..792128c0b8ae3 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/FileSystemViewCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/FileSystemViewCommand.java @@ -119,7 +119,7 @@ public String showAllFileSlices( @CliCommand(value = "show fsview latest", help = "Show latest file-system view") public String showLatestFileSlices( - @CliOption(key = {"partitionPath"}, help = "A valid partition path", mandatory = true) String partition, + @CliOption(key = {"partitionPath"}, help = "A valid partition path", unspecifiedDefaultValue = "") String partition, @CliOption(key = {"baseFileOnly"}, help = "Only display base file view", unspecifiedDefaultValue = "false") boolean baseFileOnly, @CliOption(key = {"maxInstant"}, help = "File-Slices upto this instant are displayed", diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HDFSParquetImportCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HDFSParquetImportCommand.java index 93866cafcd321..5c6407cea1443 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HDFSParquetImportCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HDFSParquetImportCommand.java @@ -53,7 +53,7 @@ public String convert( @CliOption(key = "tableName", mandatory = true, help = "Table name") final String tableName, @CliOption(key = "tableType", mandatory = true, help = "Table type") final String tableType, @CliOption(key = "rowKeyField", mandatory = true, help = "Row key field name") final String rowKeyField, - @CliOption(key = "partitionPathField", mandatory = true, + @CliOption(key = "partitionPathField", unspecifiedDefaultValue = "", help = "Partition path field name") final String partitionPathField, @CliOption(key = {"parallelism"}, mandatory = true, help = "Parallelism for hoodie insert") final String parallelism, diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/MetadataCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/MetadataCommand.java index 637f1393f51ad..e3d25e06b8860 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/MetadataCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/MetadataCommand.java @@ -27,6 +27,7 @@ import org.apache.hudi.common.engine.HoodieLocalEngineContext; import org.apache.hudi.common.util.HoodieTimer; import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.metadata.HoodieBackedTableMetadata; @@ -225,7 +226,7 @@ public String listPartitions( @CliCommand(value = "metadata list-files", help = "Print a list of all files in a partition from the metadata") public String listFiles( - @CliOption(key = {"partition"}, help = "Name of the partition to list files", mandatory = true) final String partition) throws IOException { + @CliOption(key = {"partition"}, help = "Name of the partition to list files", unspecifiedDefaultValue = "") final String partition) throws IOException { HoodieCLI.getTableMetaClient(); HoodieMetadataConfig config = HoodieMetadataConfig.newBuilder().enable(true).build(); HoodieBackedTableMetadata metaReader = new HoodieBackedTableMetadata( @@ -235,8 +236,13 @@ public String listFiles( return "[ERROR] Metadata Table not enabled/initialized\n\n"; } + Path partitionPath = new Path(HoodieCLI.basePath); + if (!StringUtils.isNullOrEmpty(partition)) { + partitionPath = new Path(HoodieCLI.basePath, partition); + } + HoodieTimer timer = new HoodieTimer().startTimer(); - FileStatus[] statuses = metaReader.getAllFilesInPartition(new Path(HoodieCLI.basePath, partition)); + FileStatus[] statuses = metaReader.getAllFilesInPartition(partitionPath); LOG.debug("Took " + timer.endTimer() + " ms"); final List rows = new ArrayList<>(); diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestFileSystemViewCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestFileSystemViewCommand.java index d5c535ebfe00c..b6813a2146f8e 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestFileSystemViewCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestFileSystemViewCommand.java @@ -59,24 +59,73 @@ @Tag("functional") public class TestFileSystemViewCommand extends CLIFunctionalTestHarness { + private String nonpartitionedTablePath; + private String partitionedTablePath; private String partitionPath; - private SyncableFileSystemView fsView; + private SyncableFileSystemView nonpartitionedFsView; + private SyncableFileSystemView partitionedFsView; @BeforeEach public void init() throws IOException { + createNonpartitionedTable(); + createPartitionedTable(); + } + + private void createNonpartitionedTable() throws IOException { HoodieCLI.conf = hadoopConf(); // Create table and connect - String tableName = tableName(); - String tablePath = tablePath(tableName); + String nonpartitionedTableName = "nonpartitioned_" + tableName(); + nonpartitionedTablePath = tablePath(nonpartitionedTableName); new TableCommand().createTable( - tablePath, tableName, - "COPY_ON_WRITE", "", 1, "org.apache.hudi.common.model.HoodieAvroPayload"); + nonpartitionedTablePath, nonpartitionedTableName, + "COPY_ON_WRITE", "", 1, "org.apache.hudi.common.model.HoodieAvroPayload"); + + HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient(); + + Files.createDirectories(Paths.get(nonpartitionedTablePath)); + + // Generate 2 commits + String commitTime1 = "3"; + String commitTime2 = "4"; + + String fileId1 = UUID.randomUUID().toString(); + + // Write date files and log file + String testWriteToken = "2-0-2"; + Files.createFile(Paths.get(nonpartitionedTablePath, FSUtils + .makeBaseFileName(commitTime1, testWriteToken, fileId1))); + Files.createFile(Paths.get(nonpartitionedTablePath, FSUtils + .makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0, testWriteToken))); + Files.createFile(Paths.get(nonpartitionedTablePath, FSUtils + .makeBaseFileName(commitTime2, testWriteToken, fileId1))); + Files.createFile(Paths.get(nonpartitionedTablePath, FSUtils + .makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime2, 0, testWriteToken))); + + // Write commit files + Files.createFile(Paths.get(nonpartitionedTablePath, ".hoodie", commitTime1 + ".commit")); + Files.createFile(Paths.get(nonpartitionedTablePath, ".hoodie", commitTime2 + ".commit")); + + // Reload meta client and create fsView + metaClient = HoodieTableMetaClient.reload(metaClient); + + nonpartitionedFsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline(), true); + } + + private void createPartitionedTable() throws IOException { + HoodieCLI.conf = hadoopConf(); + + // Create table and connect + String partitionedTableName = "partitioned_" + tableName(); + partitionedTablePath = tablePath(partitionedTableName); + new TableCommand().createTable( + partitionedTablePath, partitionedTableName, + "COPY_ON_WRITE", "", 1, "org.apache.hudi.common.model.HoodieAvroPayload"); HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient(); partitionPath = HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH; - String fullPartitionPath = Paths.get(tablePath, partitionPath).toString(); + String fullPartitionPath = Paths.get(partitionedTablePath, partitionPath).toString(); Files.createDirectories(Paths.get(fullPartitionPath)); // Generate 2 commits @@ -97,13 +146,13 @@ public void init() throws IOException { .makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime2, 0, testWriteToken))); // Write commit files - Files.createFile(Paths.get(tablePath, ".hoodie", commitTime1 + ".commit")); - Files.createFile(Paths.get(tablePath, ".hoodie", commitTime2 + ".commit")); + Files.createFile(Paths.get(partitionedTablePath, ".hoodie", commitTime1 + ".commit")); + Files.createFile(Paths.get(partitionedTablePath, ".hoodie", commitTime2 + ".commit")); // Reload meta client and create fsView metaClient = HoodieTableMetaClient.reload(metaClient); - fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline(), true); + partitionedFsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline(), true); } /** @@ -116,7 +165,7 @@ public void testShowCommits() { assertTrue(cr.isSuccess()); // Get all file groups - Stream fileGroups = fsView.getAllFileGroups(partitionPath); + Stream fileGroups = partitionedFsView.getAllFileGroups(partitionPath); List rows = new ArrayList<>(); fileGroups.forEach(fg -> fg.getAllFileSlices().forEach(fs -> { @@ -164,7 +213,7 @@ public void testShowCommitsWithSpecifiedValues() { assertTrue(cr.isSuccess()); List rows = new ArrayList<>(); - Stream fileGroups = fsView.getAllFileGroups(partitionPath); + Stream fileGroups = partitionedFsView.getAllFileGroups(partitionPath); // Only get instant 1, since maxInstant was specified 2 fileGroups.forEach(fg -> fg.getAllFileSlices().filter(fs -> fs.getBaseInstantTime().equals("1")).forEach(fs -> { @@ -197,17 +246,7 @@ public void testShowCommitsWithSpecifiedValues() { assertEquals(expected, got); } - /** - * Test case for command 'show fsview latest'. - */ - @Test - public void testShowLatestFileSlices() { - // Test show with partition path '2016/03/15' - CommandResult cr = shell().executeCommand("show fsview latest --partitionPath " + partitionPath); - assertTrue(cr.isSuccess()); - - Stream fileSlice = fsView.getLatestFileSlices(partitionPath); - + private List fileSlicesToCRList(Stream fileSlice, String partitionPath) { List rows = new ArrayList<>(); fileSlice.forEach(fs -> { int idx = 0; @@ -245,7 +284,14 @@ public void testShowLatestFileSlices() { .collect(Collectors.toList()).toString(); rows.add(row); }); + return rows; + } + /**( + * Test case for command 'show fsview latest'. + */ + @Test + public void testShowLatestFileSlices() throws IOException { Function converterFunction = entry -> NumericUtils.humanReadableByteCount((Double.parseDouble(entry.toString()))); Map> fieldNameToConverterMap = new HashMap<>(); @@ -267,9 +313,32 @@ public void testShowLatestFileSlices() { .addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_BASE_UNSCHEDULED) .addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_FILES_SCHEDULED) .addTableHeaderField(HoodieTableHeaderFields.HEADER_DELTA_FILES_UNSCHEDULED); - String expected = HoodiePrintHelper.print(header, fieldNameToConverterMap, "", false, -1, false, rows); - expected = removeNonWordAndStripSpace(expected); - String got = removeNonWordAndStripSpace(cr.getResult().toString()); - assertEquals(expected, got); + + // Test show with partition path '2016/03/15' + new TableCommand().connect(partitionedTablePath, null, false, 0, 0, 0); + CommandResult partitionedTableCR = shell().executeCommand("show fsview latest --partitionPath " + partitionPath); + assertTrue(partitionedTableCR.isSuccess()); + + Stream partitionedFileSlice = partitionedFsView.getLatestFileSlices(partitionPath); + + List partitionedRows = fileSlicesToCRList(partitionedFileSlice, partitionPath); + String partitionedExpected = HoodiePrintHelper.print(header, fieldNameToConverterMap, "", false, -1, false, partitionedRows); + partitionedExpected = removeNonWordAndStripSpace(partitionedExpected); + String partitionedResults = removeNonWordAndStripSpace(partitionedTableCR.getResult().toString()); + assertEquals(partitionedExpected, partitionedResults); + + // Test show for non-partitioned table + new TableCommand().connect(nonpartitionedTablePath, null, false, 0, 0, 0); + CommandResult nonpartitionedTableCR = shell().executeCommand("show fsview latest"); + assertTrue(nonpartitionedTableCR.isSuccess()); + + Stream nonpartitionedFileSlice = nonpartitionedFsView.getLatestFileSlices(""); + + List nonpartitionedRows = fileSlicesToCRList(nonpartitionedFileSlice, ""); + + String nonpartitionedExpected = HoodiePrintHelper.print(header, fieldNameToConverterMap, "", false, -1, false, nonpartitionedRows); + nonpartitionedExpected = removeNonWordAndStripSpace(nonpartitionedExpected); + String nonpartitionedResults = removeNonWordAndStripSpace(nonpartitionedTableCR.getResult().toString()); + assertEquals(nonpartitionedExpected, nonpartitionedResults); } }