From 7ee3e25552d7e440cbb77c7bc7ea4a92ce697f5d Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Wed, 17 Dec 2025 10:31:13 +0000 Subject: [PATCH 01/18] fat(ENGKNOW-2781): Minor tweaks to the MDR error handling. --- .../stream/sources/mdr/MdrConfiguration.java | 13 +++++++++---- .../providers/stream/sources/mdr/MdrServer.java | 6 +++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/model/src/main/java/org/gorpipe/gor/driver/providers/stream/sources/mdr/MdrConfiguration.java b/model/src/main/java/org/gorpipe/gor/driver/providers/stream/sources/mdr/MdrConfiguration.java index 03f61cf6..d4eb0b8e 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/providers/stream/sources/mdr/MdrConfiguration.java +++ b/model/src/main/java/org/gorpipe/gor/driver/providers/stream/sources/mdr/MdrConfiguration.java @@ -26,7 +26,9 @@ import org.gorpipe.base.config.ConfigManager; import org.gorpipe.base.config.annotations.Documentation; import org.gorpipe.base.config.converters.DurationConverter; -import org.gorpipe.exceptions.GorParsingException; +import org.gorpipe.util.Strings; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.nio.file.Files; import java.nio.file.Path; @@ -37,6 +39,8 @@ public interface MdrConfiguration extends Config { + Logger log = LoggerFactory.getLogger(MdrConfiguration.class); + /** * Parse MDR credentials from a string. * @@ -60,7 +64,8 @@ static List parseConfigurationData(String credentialsData) { String[] parts = credLine.split("\t"); if (parts.length != 5) { - throw new IllegalArgumentException("Invalid credential line format. Expected format: \\t\\t\\t"); + log.error("Invalid credential line format. Expected format: \\t\\t\\t"); + continue; } mdrConfList.add(ConfigManager.createConfig(MdrConfiguration.class, Map.of( @@ -81,14 +86,14 @@ static HashMap loadMdrConfigurations(MdrConfiguration final String MDR_CREDENTIALS_PATH = System.getProperty("gor.mdr.credentials"); - if (MDR_CREDENTIALS_PATH != null && !MDR_CREDENTIALS_PATH.isEmpty()) { + if (!Strings.isNullOrEmpty(MDR_CREDENTIALS_PATH)) { try { String credentialsData = Files.readString(Path.of(MDR_CREDENTIALS_PATH)); for (MdrConfiguration config : parseConfigurationData(credentialsData)) { mdrConfigurationsMap.put(config.mdrServerName(), config); } } catch (Exception e) { - throw new GorParsingException("Failed to read MDR credentials from path: " + MDR_CREDENTIALS_PATH, e); + log.error("Failed to read MDR credentials from path: " + MDR_CREDENTIALS_PATH, e); } } return mdrConfigurationsMap; diff --git a/model/src/main/java/org/gorpipe/gor/driver/providers/stream/sources/mdr/MdrServer.java b/model/src/main/java/org/gorpipe/gor/driver/providers/stream/sources/mdr/MdrServer.java index 307c6ee3..b5da032f 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/providers/stream/sources/mdr/MdrServer.java +++ b/model/src/main/java/org/gorpipe/gor/driver/providers/stream/sources/mdr/MdrServer.java @@ -49,7 +49,11 @@ public static void loadMdrServers(MdrConfiguration defaultConfig) { public static String resolveUrl(String url) { URI uri = URI.create(url); - return mdrServers.get(extractMdrEnvName(uri)).resolveMdrUrl(uri); + MdrServer server = mdrServers.get(extractMdrEnvName(uri)); + if (server == null) { + throw new GorResourceException("Can not resolve MDR url %s, config for env %s not found.".formatted(url, extractMdrEnvName(uri)), url); + } + return server.resolveMdrUrl(uri); } public static void cacheUrls(List sources) { From 40e62eb5c32070dc4ee12b5631c392cd80cbd2b4 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Thu, 18 Dec 2025 00:32:37 +0000 Subject: [PATCH 02/18] fat(ENGKNOW-2781): Fix minor issue with Exec. --- gortools/src/main/scala/gorsat/InputSources/Exec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gortools/src/main/scala/gorsat/InputSources/Exec.scala b/gortools/src/main/scala/gorsat/InputSources/Exec.scala index 8db82e16..2d791b33 100644 --- a/gortools/src/main/scala/gorsat/InputSources/Exec.scala +++ b/gortools/src/main/scala/gorsat/InputSources/Exec.scala @@ -36,7 +36,7 @@ import scala.collection.mutable.ListBuffer /** * Execute selected gor commands in NOR context. */ -class Exec() extends InputSourceInfo("EXEC", CommandArguments("","", 2, 10, ignoreIllegalArguments=true), isNorCommand = true) { +class Exec() extends InputSourceInfo("EXEC", CommandArguments("","", 2, 100, ignoreIllegalArguments=true), isNorCommand = true) { override def processArguments(context: GorContext, argString: String, iargs: Array[String], args: Array[String]): InputSourceParsingResult = { From 95bb59ea772f4565a7db52e8da95cd5dad3c9e6f Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Thu, 18 Dec 2025 16:23:33 +0000 Subject: [PATCH 03/18] fat(ENGKNOW-2781): Changes to make -link an pgor/paralle work correctly. --- .../Script/BaseScriptExecutionEngine.java | 16 +- .../java/gorsat/process/GorJavaUtilities.java | 10 +- .../scala/gorsat/Analysis/ForkWrite.scala | 163 ++++++----------- .../main/scala/gorsat/Commands/Write.scala | 32 +++- .../src/main/scala/gorsat/Macros/PGor.scala | 12 ++ .../main/scala/gorsat/Macros/Parallel.scala | 20 ++- .../QueryHandlers/GeneralQueryHandler.scala | 8 +- .../gorsat/Utilities/MacroUtilities.scala | 39 ++--- .../src/test/java/gorsat/UTestGorWrite.java | 102 ++++++++++- .../test/java/gorsat/UTestNorDictFile.java | 1 + .../gorpipe/gor/driver/GorDriverConfig.java | 6 +- .../gorpipe/gor/driver/linkfile/LinkFile.java | 85 +-------- .../gor/driver/linkfile/LinkFileMeta.java | 7 +- .../gor/driver/linkfile/LinkFileUtil.java | 164 ++++++++++++++++++ .../gor/driver/linkfile/LinkFileTest.java | 46 +++-- 15 files changed, 450 insertions(+), 261 deletions(-) create mode 100644 model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java diff --git a/gortools/src/main/java/gorsat/Script/BaseScriptExecutionEngine.java b/gortools/src/main/java/gorsat/Script/BaseScriptExecutionEngine.java index 8ae1b656..68c1ed3e 100644 --- a/gortools/src/main/java/gorsat/Script/BaseScriptExecutionEngine.java +++ b/gortools/src/main/java/gorsat/Script/BaseScriptExecutionEngine.java @@ -66,22 +66,20 @@ private Optional> resolveForkPathParent(String res) { private Optional> resolveCache(GorContext context, String lastCommand, ExecutionBlock queryBlock) { var write = new Write(); - var split = CommandParseUtilities.quoteSafeSplit(lastCommand.substring(6).trim(), ' '); - var args = write.validateArguments(split); - String lastField; - if (args.length == 0) { + var args = lastCommand.substring("write ".length()).split(" "); + var options = write.parseBaseOptions(context, write.validateArguments(args), args, false); + var outFile = options._1(); + if (Strings.isNullOrEmpty(outFile)) { if (queryBlock.signature() != null) { var writeFilePath = context.getSession().getProjectContext().getFileCache().tempLocation(queryBlock.signature(), DataType.GORD.suffix); writeFilePath = PathUtils.relativize(context.getSession().getProjectContext().getProjectRoot(), writeFilePath); queryBlock.query_$eq(queryBlock.query() + " " + writeFilePath); - lastField = writeFilePath; + outFile = writeFilePath; } else { - lastField = null; + outFile = null; } - } else { - lastField = args[0].trim(); } - return !Strings.isNullOrEmpty(lastField) && !lastField.startsWith("-") ? resolveForkPathParent(lastField) : Optional.empty(); + return !Strings.isNullOrEmpty(outFile) ? resolveForkPathParent(outFile) : Optional.empty(); } public Optional> getExplicitWrite(GorContext context, ExecutionBlock queryBlock) { diff --git a/gortools/src/main/java/gorsat/process/GorJavaUtilities.java b/gortools/src/main/java/gorsat/process/GorJavaUtilities.java index 906aa02d..4f0b392a 100644 --- a/gortools/src/main/java/gorsat/process/GorJavaUtilities.java +++ b/gortools/src/main/java/gorsat/process/GorJavaUtilities.java @@ -24,6 +24,7 @@ import org.apache.commons.io.FilenameUtils; import org.gorpipe.exceptions.GorSystemException; +import org.gorpipe.gor.driver.linkfile.LinkFileUtil; import org.gorpipe.gor.driver.meta.DataType; import org.gorpipe.gor.model.*; import org.gorpipe.gor.model.FileReader; @@ -447,7 +448,7 @@ public static String verifyLinkFileLastModified(ProjectContext projectContext, S return cacheFile; } - public static void writeDictionaryFromMeta(FileReader fileReader, String outfolderpath, String dictionarypath) throws IOException { + public static void writeDictionaryFromMeta(String commandToExecute, FileReader fileReader, String outfolderpath, String dictionarypath) throws IOException { FileReader localFileReader = fileReader; fileReader.updateFileSystemMetaData(outfolderpath); @@ -487,6 +488,13 @@ public static void writeDictionaryFromMeta(FileReader fileReader, String outfold } localFileReader.writeLinkIfNeeded(dictionarypath); + + var linkOptions = LinkFileUtil.extractLinkOptionData(commandToExecute); + if (!Strings.isNullOrEmpty(linkOptions)) { + var linkMetaOption = LinkFileUtil.extractLinkMetaOptionData(commandToExecute); + var linkData = LinkFileUtil.extractLink(fileReader, outfolderpath, linkOptions, linkMetaOption, null); + LinkFileUtil.writeLinkFile(fileReader, linkData); + } } public static Optional parseDictionaryColumn(String[] dictList, FileReader fileReader) { diff --git a/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala b/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala index 61885b76..d68e6dae 100644 --- a/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala +++ b/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala @@ -23,21 +23,18 @@ package gorsat.Analysis import java.util.zip.Deflater -import gorsat.Commands.{Analysis, CommandParseUtilities, Output, RowHeader} +import gorsat.Commands.{Analysis, Output, RowHeader} import gorsat.Outputs.OutFile import org.apache.commons.io.FilenameUtils -import org.apache.commons.lang3.StringUtils import org.gorpipe.exceptions.GorResourceException import org.gorpipe.gor.binsearch.GorIndexType -import org.gorpipe.gor.driver.linkfile.{LinkFile, LinkFileEntryV1} -import org.gorpipe.gor.driver.meta.{DataType, SourceReference} -import org.gorpipe.gor.driver.providers.stream.sources.StreamSource -import org.gorpipe.gor.model.{DriverBackedFileReader, Row} +import org.gorpipe.gor.driver.linkfile.LinkFileUtil +import org.gorpipe.gor.driver.meta.DataType +import org.gorpipe.gor.model.Row import org.gorpipe.gor.session.{GorSession, ProjectContext} import org.gorpipe.gor.table.util.PathUtils import org.gorpipe.gor.util.DataUtil import org.gorpipe.model.gor.RowObj -import org.gorpipe.util.Strings import java.util.UUID import scala.collection.mutable @@ -129,48 +126,10 @@ case class ForkWrite(forkCol: Int, if (forkCol >= 0 && options.useFolder.isEmpty && !(fullFileName.contains("#{fork}") || fullFileName.contains("""${fork}"""))) { throw new GorResourceException("WRITE error: #{fork} of ${fork}missing from filename.", fullFileName) } - var fileName : String = _ - val projectContext = session.getProjectContext - if(options.useFolder.nonEmpty) { - val folder = options.useFolder.get - ensureDir(projectContext, folder) - val fn = if (fullFileName.isEmpty) { - val uuid = UUID.randomUUID().toString - val folderEnding = FilenameUtils.getExtension(folder) - val ending = if (folderEnding.nonEmpty) "." + folderEnding else (if (options.nor) DataType.NOR.suffix else DataType.GORZ.suffix) - s"$uuid${if(DataUtil.isGord(folder)) DataType.GORZ.suffix else ending}" - } else { - fullFileName - } - - val dir = if(folder.endsWith("/")) folder else folder + "/" - if (forkCol >= 0) { - val cols = inHeader.split("\t") - val fork = cols(forkCol) + "=" + forkValue - val forkdir = dir + fork - ensureDir(projectContext, forkdir) - fileName = forkdir + "/" + fn - } else { - fileName = dir + fn - } - } else { - fileName = if (forkCol >= 0) { - fullFileName.replace("#{fork}", forkValue).replace("""${fork}""", forkValue) - } else { - if (fullFileName.isEmpty && options.linkFile.nonEmpty) { - val (linkFileMeta, linkFileInfo) = extractLinkMetaInfo(options.linkFileMeta) - val linkSourceRef = new SourceReference(options.linkFile, null, projectContext.getFileReader.getCommonRoot, null, null, true); - // Infer the full file name from the link (and defautl locations) - LinkFile.inferDataFileNameFromLinkFile( - projectContext.getFileReader.resolveDataSource(linkSourceRef).asInstanceOf[StreamSource], linkFileMeta); - } else { - fullFileName - } - } - - ensureDir(projectContext, fileName, parent = true) - } + val projectContext = session.getProjectContext + var fileName : String = inferFileName(fullFileName, forkValue) + ensureDir(projectContext, fileName, parent = true) var fileOpen = false var headerWritten = false @@ -225,6 +184,40 @@ case class ForkWrite(forkCol: Int, override def isTypeInformationMaintained: Boolean = true + def inferFileName(inFileName: String, forkValue: String): String = { + var inferredFileName: String = "" + if(options.useFolder.nonEmpty) { + val folder = options.useFolder.get + val fn = if (inFileName.isEmpty || DataUtil.isGord(folder)) { + val uuid = UUID.randomUUID().toString + val folderEnding = FilenameUtils.getExtension(folder) + val ending = if (folderEnding.nonEmpty) "." + folderEnding else (if (options.nor) DataType.NOR.suffix else DataType.GORZ.suffix) + s"$uuid${if(DataUtil.isGord(folder)) DataType.GORZ.suffix else ending}" + } else { + inFileName + } + + val dir = if(folder.endsWith("/")) folder else folder + "/" + + if (forkCol >= 0) { + val cols = inHeader.split("\t") + val fork = cols(forkCol) + "=" + forkValue + val forkdir = dir + fork + inferredFileName = forkdir + "/" + fn + } else { + inferredFileName = dir + fn + } + } else { + inferredFileName = if (forkCol >= 0) { + inFileName.replace("#{fork}", forkValue).replace("""${fork}""", forkValue) + } else { + inFileName + } + } + + inferredFileName + } + /** * Creates OutFile with given name * if the path is a directory save a file with generated md5 sum as name under directory @@ -343,19 +336,22 @@ case class ForkWrite(forkCol: Int, }) } - if (useFork) { - forkMap.values.foreach(sh => { - val (linkFile, linkFileUrl, linkFileMeta, linkFileInfo) = extractLink(sh.fileName) + if (options.linkFile.nonEmpty) { + if (useFork) { + forkMap.values.foreach(sh => { + val linkData = LinkFileUtil.extractLink(session.getProjectContext.getFileReader, sh.fileName, + inferFileName(options.linkFile, sh.forkValue), options.linkFileMeta, getMd5) - if (linkFile.nonEmpty) { - writeLinkFile(linkFile, linkFileUrl, linkFileMeta, linkFileInfo) - } - }) - } else { - val (linkFile, linkFileUrl, linkFileMeta, linkFileInfo) = extractLink(singleFileHolder.fileName, options.linkFile, options.linkFileMeta) + if (linkData.linkFile().nonEmpty) { + LinkFileUtil.writeLinkFile(session.getProjectContext.getFileReader, linkData) + } + }) + } else if (options.useFolder.isEmpty && !singleFileHolder.fileName.contains(".gord/")) { + val linkData = LinkFileUtil.extractLink(session.getProjectContext.getFileReader, singleFileHolder.fileName, options.linkFile, options.linkFileMeta, getMd5) - if (linkFile.nonEmpty) { - writeLinkFile(linkFile, linkFileUrl, linkFileMeta, getMd5, linkFileInfo) + if (linkData.linkFile().nonEmpty) { + LinkFileUtil.writeLinkFile(session.getProjectContext.getFileReader, linkData) + } } } } @@ -368,53 +364,4 @@ case class ForkWrite(forkCol: Int, "" } } - - private def extractLink(source: String, optLinkFile: String = "", optLinkFileMeta: String = "") : (String, String, String, String) = { - var linkFile = LinkFile.validateAndUpdateLinkFileName(optLinkFile) - var linkFileContent = if (linkFile.nonEmpty) PathUtils.resolve(session.getProjectContext.getProjectRoot, source) else "" - - if (linkFile.isEmpty && source.nonEmpty) { - // Check if link file is forced from the source - val dataSource = session.getProjectContext.getFileReader.resolveUrl(source, true) - if (dataSource != null && dataSource.forceLink()) { - linkFile = dataSource.getProjectLinkFile - linkFileContent = dataSource.getProjectLinkFileContent - } - } - val (linkFileMeta, linkFileInfo) = extractLinkMetaInfo(optLinkFileMeta) - (linkFile, linkFileContent, linkFileMeta, linkFileInfo) - } - - private def extractLinkMetaInfo(optLinkFileMeta: String) : (String, String) = { - var linkFileMeta = "" - var linkFileInfo = "" - - if (!Strings.isNullOrEmpty(optLinkFileMeta)) { - for (s <- CommandParseUtilities.quoteSafeSplit(StringUtils.strip(optLinkFileMeta, "\"\'"), ',')) { - val l = s.trim - if (l.startsWith(LinkFileEntryV1.ENTRY_INFO_KEY)) { - linkFileInfo = StringUtils.strip(l.substring(LinkFileEntryV1.ENTRY_INFO_KEY.length + 1), "\"\'") - } else { - linkFileMeta += "## " + l + "\n" - } - } - } - - (linkFileMeta, linkFileInfo) - } - - private def writeLinkFile(linkFilePath: String, linkFileContent: String, - linkFileMeta: String = "", md5: String = null, linkFileInfo: String = null) : Unit = { - // Validate that we can write to the location (skip link extension as writing links is always forbidden). - session.getProjectContext.getFileReader.resolveUrl(FilenameUtils.removeExtension(linkFilePath), true) - - // Use the nonsecure driver file reader as this is an exception from the write no links rule. - val fileReader = new DriverBackedFileReader(session.getProjectContext.getFileReader.getSecurityContext, - session.getProjectContext.getProjectRoot, session.getProjectContext.getFileReader.getQueryTime) - - LinkFile.load(fileReader.resolveUrl(linkFilePath, true).asInstanceOf[StreamSource]) - .appendMeta(linkFileMeta) - .appendEntry(linkFileContent, md5, linkFileInfo, fileReader) - .save(session.getProjectContext.getFileReader.getQueryTime) - } } diff --git a/gortools/src/main/scala/gorsat/Commands/Write.scala b/gortools/src/main/scala/gorsat/Commands/Write.scala index bf4920ee..738feca2 100644 --- a/gortools/src/main/scala/gorsat/Commands/Write.scala +++ b/gortools/src/main/scala/gorsat/Commands/Write.scala @@ -28,7 +28,9 @@ import gorsat.Commands.CommandParseUtilities._ import org.apache.commons.io.FilenameUtils import org.gorpipe.exceptions.{GorParsingException, GorResourceException} import org.gorpipe.gor.binsearch.GorIndexType -import org.gorpipe.gor.driver.meta.DataType +import org.gorpipe.gor.driver.linkfile.LinkFileUtil +import org.gorpipe.gor.driver.meta.{DataType, SourceReference} +import org.gorpipe.gor.driver.providers.stream.sources.StreamSource import org.gorpipe.gor.session.GorContext import org.gorpipe.gor.util.DataUtil @@ -36,19 +38,41 @@ import org.gorpipe.gor.util.DataUtil class Write extends CommandInfo("WRITE", CommandArguments("-r -c -m -inferschema -maxseg -noheader", "-d -f -i -t -l -tags -card -prefix -link -linkmeta", 0), CommandOptions(gorCommand = true, norCommand = true, verifyCommand = true)) { - override def processArguments(context: GorContext, argString: String, iargs: Array[String], args: Array[String], executeNor: Boolean, forcedInputHeader: String): CommandParsingResult = { + def parseBaseOptions(context: GorContext, iargs: Array[String], args: Array[String], executeNor: Boolean): (String, Option[String], Boolean) = { var fileName = replaceSingleQuotes(iargs.mkString(" ")) + + val linkOpt = if (hasOption(args, "-link")) stringValueOfOption(args, "-link") else "" + val linkMetaOpt = if (hasOption(args, "-linkmeta")) stringValueOfOption(args, "-linkmeta") else "" + + fileName = if (fileName.isEmpty && linkOpt.nonEmpty) { + val linkMetaInfo = LinkFileUtil.extractLinkMetaInfo(linkMetaOpt) + val linkSourceRef = new SourceReference(linkOpt, null, context.getSession.getProjectContext.getFileReader.getCommonRoot, null, null, true); + // Infer the full file name from the link (and defautl locations) + LinkFileUtil.inferDataFileNameFromLinkFile( + context.getSession.getProjectContext.getFileReader.resolveDataSource(linkSourceRef).asInstanceOf[StreamSource], linkMetaInfo.linkFileMeta); + } else { + fileName + } + val useFolder = if (hasOption(args, "-d")) { Option.apply(stringValueOfOption(args, "-d")) } else if(!executeNor && DataUtil.isGord(fileName)) { val fn = fileName - fileName = "" Option.apply(fn) } else { Option.empty } - + + val hasFork = hasOption(args, "-f") + + (fileName, useFolder, hasFork) + } + + override def processArguments(context: GorContext, argString: String, iargs: Array[String], args: Array[String], executeNor: Boolean, forcedInputHeader: String): CommandParsingResult = { + + val (fileName, useFolder, _) = parseBaseOptions(context, iargs, args, executeNor) + var forkCol = -1 var remove = false var columnCompress: Boolean = false diff --git a/gortools/src/main/scala/gorsat/Macros/PGor.scala b/gortools/src/main/scala/gorsat/Macros/PGor.scala index 2beb22d8..be9e8dd1 100644 --- a/gortools/src/main/scala/gorsat/Macros/PGor.scala +++ b/gortools/src/main/scala/gorsat/Macros/PGor.scala @@ -26,8 +26,10 @@ import gorsat.Commands.{CommandArguments, CommandParseUtilities} import gorsat.Script import gorsat.Script._ import gorsat.Utilities.MacroUtilities.getCachePath +import org.gorpipe.gor.driver.linkfile.LinkFileUtil import org.gorpipe.gor.session.GorContext import org.gorpipe.gor.table.util.PathUtils +import org.gorpipe.util.Strings import java.util /*** @@ -114,6 +116,16 @@ class PGor extends MacroInfo("PGOR", CommandArguments("-nowithin", "-gordfolder" gordict += " -nodict" } + val linkOptions = LinkFileUtil.extractLinkOptionData(queryAppend) + if (!Strings.isNullOrEmpty(linkOptions)) { + gordict += " -link " + linkOptions + } + + val linkMetaOptions = LinkFileUtil.extractLinkMetaOptionData(queryAppend) + if (!Strings.isNullOrEmpty(linkMetaOptions)) { + gordict += " -linkMeta " + linkMetaOptions + } + val cmd = splitManager.chromosomeSplits.keys.foldLeft(gordict)((x, y) => x + " [" + theKey + "_" + y + "] " + splitManager.chromosomeSplits(y).getRange) (cmd, theDependencies, partitionedGorCommands) diff --git a/gortools/src/main/scala/gorsat/Macros/Parallel.scala b/gortools/src/main/scala/gorsat/Macros/Parallel.scala index af9a2059..383226cd 100644 --- a/gortools/src/main/scala/gorsat/Macros/Parallel.scala +++ b/gortools/src/main/scala/gorsat/Macros/Parallel.scala @@ -27,8 +27,10 @@ import gorsat.Script.{ExecutionBlock, MacroInfo, MacroParsingResult, ScriptParse import gorsat.Utilities.MacroUtilities import gorsat.process.{GorInputSources, GorJavaUtilities, GorPipeMacros, SourceProvider} import org.gorpipe.exceptions.GorParsingException +import org.gorpipe.gor.driver.linkfile.LinkFileUtil import org.gorpipe.gor.session.GorContext import org.gorpipe.gor.table.util.PathUtils +import org.gorpipe.util.Strings import java.util @@ -65,7 +67,7 @@ class Parallel extends MacroInfo("PARALLEL", CommandArguments("-gordfolder", "-p val inputSource1 = SourceProvider(CommandParseUtilities.stringValueOfOption(options, "-parts"), context, executeNor = true, isNor = false) val partsSource = inputSource1.source val header = inputSource1.header - val extraCommands: String = MacroUtilities.getExtraStepsFromQuery(create.query).trim + var extraCommands: String = MacroUtilities.getExtraStepsFromQuery(create.query).trim val parGorCommands = new util.LinkedHashMap[String, ExecutionBlock]() val theKey = createKey.slice(1, createKey.length - 1) var theDependencies: List[String] = Nil @@ -103,6 +105,10 @@ class Parallel extends MacroInfo("PARALLEL", CommandArguments("-gordfolder", "-p val srcmd = newCommand.substring(0,i) if (GorJavaUtilities.isPGorCmd(srcmd)) newCommand = srcmd+"-gordfolder nodict "+newCommand.substring(i) } + + if (hasDictFolderWrite && create.cachePath != null && !extraCommands.contains(create.cachePath)) { + extraCommands = extraCommands + " " + create.cachePath + } if (extraCommands.nonEmpty) newCommand += " " + extraCommands parGorCommands.put(parKey, ExecutionBlock(create.groupName, newCommand, create.signature, create.dependencies, create.batchGroupName, cachePath, hasForkWrite = hasForkWrite)) @@ -118,7 +124,17 @@ class Parallel extends MacroInfo("PARALLEL", CommandArguments("-gordfolder", "-p partsSource.close() } - val theCommand = Range(1,parGorCommands.size+1).foldLeft(getDictionaryType(cmdToModify,useGordFolders)) ((x, y) => x + " [" + theKey + "_" + y + "] " + y) + var theCommand = Range(1,parGorCommands.size+1).foldLeft(getDictionaryType(cmdToModify,useGordFolders)) ((x, y) => x + " [" + theKey + "_" + y + "] " + y) + + val linkOptions = LinkFileUtil.extractLinkOptionData(create.query) + if (!Strings.isNullOrEmpty(linkOptions)) { + theCommand += " -link " + linkOptions + } + + val linkMetaOptions = LinkFileUtil.extractLinkMetaOptionData(create.query) + if (!Strings.isNullOrEmpty(linkMetaOptions)) { + theCommand += " -linkMeta " + linkMetaOptions + } parGorCommands.put(createKey, ExecutionBlock(create.groupName, theCommand, create.signature, theDependencies.toArray, create.batchGroupName, cachePath, isDictionary = true, hasForkWrite = hasForkWrite)) MacroParsingResult(parGorCommands, null) diff --git a/gortools/src/main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala b/gortools/src/main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala index 88b6d715..cc4d4f7b 100644 --- a/gortools/src/main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala +++ b/gortools/src/main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala @@ -310,7 +310,7 @@ object GeneralQueryHandler { } } - private def writeOutGorDictionaryFolder(fileReader: FileReader, outfolderpath: String, useTheDict: Boolean): Unit = { + private def writeOutGorDictionaryFolder(commandToExecute: String, fileReader: FileReader, outfolderpath: String, useTheDict: Boolean): Unit = { val outpath = if(useTheDict) { if (outfolderpath.endsWith("/")) s"$outfolderpath${GorOptions.DEFAULT_FOLDER_DICTIONARY_NAME}" else s"$outfolderpath/${GorOptions.DEFAULT_FOLDER_DICTIONARY_NAME}"; } else { @@ -324,7 +324,7 @@ object GeneralQueryHandler { s"$outfolderpath/${outfolderpath.substring(idx+1)}" } } - GorJavaUtilities.writeDictionaryFromMeta(fileReader, outfolderpath, outpath) + GorJavaUtilities.writeDictionaryFromMeta(commandToExecute, fileReader, outfolderpath, outpath) } def dictRangeFromSeekRange(inp: String, prefix: String): String = { @@ -382,7 +382,7 @@ object GeneralQueryHandler { private def writeOutGorDictionary(commandToExecute: String, fileReader: FileReader, outfile: String, useTheDict: Boolean): String = { if(fileReader.isDirectory(outfile)) { - if (!commandToExecute.toLowerCase.contains("-nodict")) writeOutGorDictionaryFolder(fileReader, outfile, useTheDict) + if (!commandToExecute.toLowerCase.contains("-nodict")) writeOutGorDictionaryFolder(commandToExecute, fileReader, outfile, useTheDict) } else { val w = commandToExecute.split(' ') var dictFiles: List[String] = Nil @@ -441,7 +441,7 @@ object GeneralQueryHandler { private def writeOutGorDictionaryPart(commandToExecute: String, fileReader: FileReader, outfile: String, useTheDict: Boolean): String = { if(fileReader.isDirectory(outfile)) { - if (!commandToExecute.toLowerCase.contains("-nodict")) writeOutGorDictionaryFolder(fileReader, outfile, useTheDict) + if (!commandToExecute.toLowerCase.contains("-nodict")) writeOutGorDictionaryFolder(commandToExecute, fileReader, outfile, useTheDict) } else { val w = commandToExecute.split(' ') var dictFiles: List[String] = Nil diff --git a/gortools/src/main/scala/gorsat/Utilities/MacroUtilities.scala b/gortools/src/main/scala/gorsat/Utilities/MacroUtilities.scala index fc840854..66200810 100644 --- a/gortools/src/main/scala/gorsat/Utilities/MacroUtilities.scala +++ b/gortools/src/main/scala/gorsat/Utilities/MacroUtilities.scala @@ -36,6 +36,7 @@ import org.gorpipe.gor.model.FileReader import org.gorpipe.gor.session.GorContext import org.gorpipe.gor.table.util.PathUtils import org.gorpipe.gor.util.DataUtil +import org.gorpipe.util.Strings import org.slf4j.{Logger, LoggerFactory} import java.nio.file.{Files, Paths} @@ -464,32 +465,28 @@ object MacroUtilities { val inested = nested.substring(2,nested.length-1) querySplit = CommandParseUtilities.quoteSafeSplit(inested,'|') } - val lastCmd = querySplit.last.trim - val lastCmdLower = lastCmd.toLowerCase + var lastCmd = querySplit.last.trim val hasWrite = isCommandWrite(lastCmd) - val didx = if(hasWrite) lastCmd.indexOf(" -d ") else 0 - val lidx = if(hasWrite) { - if (DataUtil.isGord(lastCmdLower)) lastCmdLower.length-5 - else lastCmdLower.indexOf(DataType.GORD.suffix + "/") - } else 0 - val hasForkWrite = isCommandForkWrite(lastCmd) - val hasGordFolderWrite = didx > 0 || lidx > 0 - val writeDir = if (didx>0) { - var k = didx+4 - while (lastCmd.charAt(k)==' ') k += 1 - val e = lastCmd.indexOf(' ',k) - if (e == -1) lastCmd.substring(k).trim else lastCmd.substring(k,e).trim - } else if (lidx>0) { - val k = lastCmd.lastIndexOf(' ',lidx)+1 - lastCmd.substring(k,lidx+5) - } else null - val hasWriteFile = hasWrite & DataUtil.isGord(lastCmdLower) + + if (hasWrite && create.cachePath != null && !lastCmd.contains(create.cachePath)) { + lastCmd = lastCmd + " " + create.cachePath + } + val (writeFile, useFolder, hasForkWrite) = if (hasWrite) { + val write = new Write + val args = lastCmd.substring("write ".length).split(" ") + write.parseBaseOptions(context, write.validateArguments(args), args, false) + } else { + ("", Option.empty, false) + } + val writeDir = if (DataUtil.isGord(writeFile)) writeFile else PathUtils.stripTrailingSlash(PathUtils.getParent(writeFile)) + val hasGordFolderWrite = useFolder.nonEmpty || DataUtil.isGord(writeDir) + val finalQuery = if(hasWrite) querySplit.slice(0,querySplit.length-1).mkString("|") else innerQuery if(skipcache) { val queryAppend = appendQuery(finalQuery, lastCmd, false) (hasGordFolderWrite, false, hasForkWrite, null, queryAppend) - } else if(writeDir != null || hasWriteFile) { - val cacheRes = if(writeDir!=null) writeDir else lastCmd.split(" ").last + } else if(!Strings.isNullOrEmpty(writeDir)) { + val cacheRes = if (!Strings.isNullOrEmpty(writeDir)) writeDir else lastCmd.split(" ").last val cachepath = Paths.get(cacheRes) val cacheFileExists = Files.exists(cachepath) && !Files.isDirectory(cachepath) val queryAppend = " <(" + finalQuery + ")" + " | " + lastCmd diff --git a/gortools/src/test/java/gorsat/UTestGorWrite.java b/gortools/src/test/java/gorsat/UTestGorWrite.java index 24ece8b6..f0be3484 100644 --- a/gortools/src/test/java/gorsat/UTestGorWrite.java +++ b/gortools/src/test/java/gorsat/UTestGorWrite.java @@ -23,7 +23,6 @@ package gorsat; import org.apache.commons.io.FileUtils; -import org.apache.commons.io.file.PathUtils; import org.gorpipe.exceptions.GorParsingException; import org.gorpipe.exceptions.GorSecurityException; import org.gorpipe.exceptions.GorSystemException; @@ -34,6 +33,7 @@ import org.gorpipe.gor.driver.meta.DataType; import org.gorpipe.gor.driver.providers.stream.sources.file.FileSource; import org.gorpipe.gor.model.BaseMeta; +import org.gorpipe.gor.table.util.PathUtils; import org.gorpipe.gor.util.DataUtil; import org.junit.*; import org.junit.contrib.java.lang.system.EnvironmentVariables; @@ -238,7 +238,7 @@ public void testWriteLinkFileAndMetaWithInfo() throws IOException { @Test public void testWriteLinkFileWithInferFileName() throws IOException { - environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL, workDirPath.resolve("managed_data").toString()); + environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL, workDirPath.resolve("managed_data").toString()); TestUtils.runGorPipe("gorrow chr1,1,100 | write -link ltest.gor", "-gorroot", workDirPath.toString()); var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("ltest.gor.link").toString())); @@ -255,7 +255,7 @@ public void testWriteLinkFileWithInferFileName() throws IOException { @Test public void testWriteLinkFileWithInferFileNameForExistingLink() throws IOException { - environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL, workDirPath.resolve("managed_data").toString()); + environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL, workDirPath.resolve("managed_data").toString()); TestUtils.runGorPipe("gorrow chr1,1,100 | write -link ltest.gor", "-gorroot", workDirPath.toString()); TestUtils.runGorPipe("gorrow chr1,1,101 | write -link ltest.gor", "-gorroot", workDirPath.toString()); @@ -270,6 +270,102 @@ public void testWriteLinkFileWithInferFileNameForExistingLink() throws IOExcepti } + @Test + public void testWriteLinkFileForGordFolder() throws IOException { + Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); + Files.copy(p, workDirPath.resolve("dbsnp.gor")); + TestUtils.runGorPipe("pgor dbsnp.gor | write -link dbsnp3.gord dbsnp2.gord ", "-gorroot", workDirPath.toString()); + + var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("dbsnp3.gord.link").toString())); + + Assert.assertEquals(1, linkFile.getEntriesCount()); + Assert.assertEquals(workDirPath.resolve("dbsnp2.gord"), Path.of(linkFile.getLatestEntry().url())); + + String linkresult1 = TestUtils.runGorPipe("gor dbsnp.gor| top 1000", "-gorroot", workDirPath.toString()); + String linkresult3 = TestUtils.runGorPipe("gor dbsnp3.gord | top 1000", "-gorroot", workDirPath.toString()); + Assert.assertEquals(linkresult1, linkresult3); + } + + @Test + public void testWriteLinkFileForGordFolderParallel() throws IOException { + Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); + Files.copy(p, workDirPath.resolve("dbsnp.gor")); + TestUtils.runGorPipe("parallel -parts <(nor dbsnp.gor | select chrom | distinct) <(gor -p #{col:Chrom} dbsnp.gor) | write -link dbsnp3.gord dbsnp2.gord ", "-gorroot", workDirPath.toString()); + + var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("dbsnp3.gord.link").toString())); + + Assert.assertEquals(1, linkFile.getEntriesCount()); + Assert.assertEquals(workDirPath.resolve("dbsnp2.gord"), Path.of(linkFile.getLatestEntry().url())); + + String linkresult1 = TestUtils.runGorPipe("gor dbsnp.gor| top 1000", "-gorroot", workDirPath.toString()); + String linkresult3 = TestUtils.runGorPipe("gor dbsnp3.gord | top 1000", "-gorroot", workDirPath.toString()); + Assert.assertEquals(linkresult1, linkresult3); + } + + @Test + public void testWriteLinkFileForGordFolderInferFilename() throws IOException { + Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); + Files.copy(p, workDirPath.resolve("dbsnp.gor")); + TestUtils.runGorPipe("pgor dbsnp.gor | write -link dbsnp3.gord", "-gorroot", workDirPath.toString()); + + var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("dbsnp3.gord.link").toString())); + + Assert.assertEquals(1, linkFile.getEntriesCount()); + Assert.assertTrue(linkFile.getLatestEntry().url().matches(".*?dbsnp3\\..*?\\.gord/")); + + String linkresult1 = TestUtils.runGorPipe("gor dbsnp.gor| top 1000", "-gorroot", workDirPath.toString()); + String linkresult3 = TestUtils.runGorPipe("gor dbsnp3.gord | top 1000", "-gorroot", workDirPath.toString()); + Assert.assertEquals(linkresult1, linkresult3); + } + + @Test + public void testWriteLinkFileForGordFolderInferFilenameParallel() throws IOException { + Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); + Files.copy(p, workDirPath.resolve("dbsnp.gor")); + TestUtils.runGorPipe("parallel -parts <(nor dbsnp.gor | select chrom | distinct) <(gor -p #{col:Chrom} dbsnp.gor) | write -link dbsnp3.gord", "-gorroot", workDirPath.toString()); + + var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("dbsnp3.gord.link").toString())); + + Assert.assertEquals(1, linkFile.getEntriesCount()); + Assert.assertTrue(linkFile.getLatestEntry().url().matches(".*?dbsnp3\\..*?\\.gord/")); + + String linkresult1 = TestUtils.runGorPipe("gor dbsnp.gor | top 500", "-gorroot", workDirPath.toString()); + String linkresult3 = TestUtils.runGorPipe("gor dbsnp3.gord | top 500", "-gorroot", workDirPath.toString()); + Assert.assertEquals(linkresult1, linkresult3); + } + + @Test + public void testWriteLinkFileForForkWrite() throws IOException { + Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); + Files.copy(p, workDirPath.resolve("dbsnp.gor")); + TestUtils.runGorPipe("gor dbsnp.gor | write -f chrom -link dbsnp-#{fork}-link.gor.link dbsnp-#{fork}-data.gor ", "-gorroot", workDirPath.toString()); + + var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("dbsnp-chr1-link.gor.link").toString())); + + Assert.assertEquals(1, linkFile.getEntriesCount()); + Assert.assertEquals("dbsnp-chr1-data.gor", PathUtils.getFileName(linkFile.getLatestEntry().url())); + + var countLinkFiles = Files.list(workDirPath).map(f -> f.getFileName().toString()).filter(f -> f.endsWith(".link")).count(); + Assert.assertEquals(24, countLinkFiles); + + } + + @Ignore + @Test + public void testWriteLinkFileForForkWriteWrongLinkName() throws IOException { + Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); + Files.copy(p, workDirPath.resolve("dbsnp.gor")); + TestUtils.runGorPipe("gor dbsnp.gor | write -f chrom -link dbsnp-link.gor.link dbsnp-#{fork}-data.gor ", "-gorroot", workDirPath.toString()); + + var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("dbsnp3.gord.link").toString())); + + Assert.assertEquals(1, linkFile.getEntriesCount()); + Assert.assertEquals("dbsnp-chr1-data.gor", linkFile.getLatestEntry().url()); + + var countLinkFiles = Files.list(workDirPath).filter(f -> f.endsWith(".link")).count(); + Assert.assertEquals(10, countLinkFiles); + } + @Test public void testTxtWriteServer() throws IOException { Path p = Paths.get("../tests/data/nor/simple.nor"); diff --git a/gortools/src/test/java/gorsat/UTestNorDictFile.java b/gortools/src/test/java/gorsat/UTestNorDictFile.java index 578eeac8..e56349cd 100644 --- a/gortools/src/test/java/gorsat/UTestNorDictFile.java +++ b/gortools/src/test/java/gorsat/UTestNorDictFile.java @@ -56,6 +56,7 @@ public void loadGenericNordDict() throws IOException { var nordDict = new NorDictionaryTable( Path.of(path, "test.nord").toString(), ProjectContext.DEFAULT_READER); Assert.assertEquals(10, nordDict.getEntries().size()); Assert.assertEquals("phenotype", nordDict.getSourceColumn()); + Assert.assertEquals(100, TestUtils.runGorPipeCount("nor " + Path.of(path, "test.nord"))); } @Test() diff --git a/model/src/main/java/org/gorpipe/gor/driver/GorDriverConfig.java b/model/src/main/java/org/gorpipe/gor/driver/GorDriverConfig.java index 460cc110..4bda4606 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/GorDriverConfig.java +++ b/model/src/main/java/org/gorpipe/gor/driver/GorDriverConfig.java @@ -182,11 +182,11 @@ public interface GorDriverConfig extends Config { @DefaultValue("plink2") String plinkExecutable(); - String GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL = "GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL"; + String GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL = "GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL"; @Documentation("Root location of managed data files for link files.") - @Key(GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL) + @Key(GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL) @DefaultValue("") - String managedLinkDataFilesURL(); + String managedLinkDataRootURL(); String GOR_DRIVER_LINK_INFER_REPLACE = "GOR_DRIVER_LINK_INFER_REPLACE"; @Documentation("Replacement patterns when inferring link file paths ([;]).") diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java index 9db557b6..e0f439f7 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java @@ -5,14 +5,11 @@ import java.io.OutputStream; import java.util.List; import java.util.concurrent.TimeUnit; - -import org.apache.commons.lang3.RandomStringUtils; import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; import com.google.common.util.concurrent.UncheckedExecutionException; import org.gorpipe.exceptions.GorResourceException; -import org.gorpipe.gor.driver.GorDriverConfig; import org.gorpipe.gor.driver.meta.SourceReference; import org.gorpipe.gor.driver.providers.stream.StreamUtils; import org.gorpipe.gor.driver.providers.stream.sources.StreamSource; @@ -27,7 +24,7 @@ * Link file format, a valid nor format. Example: * * ## VERSION= - * ## SERIAl= + * ## SERIAl= * ## ENTRIES_COUNT_MAX= * ## ENTRIES_AGE_MAX= * # FILE\tTIMESTAMP\tMD5\tSERIAL\tINFO @@ -93,11 +90,11 @@ public static LinkFile loadV1(StreamSource source) throws IOException { return new LinkFileV1(source, LinkFileMeta.createOrLoad(content, LinkFileV1.VERSION, true), content); } - public static LinkFile createV0(StreamSource source, String content) throws IOException { + public static LinkFile createV0(StreamSource source, String content) { return new LinkFileV0(source, LinkFileMeta.createOrLoad(content, LinkFileV0.VERSION, true), content); } - public static LinkFile createV1(StreamSource source, String content) throws IOException { + public static LinkFile createV1(StreamSource source, String content) { return new LinkFileV1(source, LinkFileMeta.createOrLoad(content, LinkFileV1.VERSION, true), content); } @@ -109,75 +106,7 @@ public static String validateAndUpdateLinkFileName(String linkFilePath) { } } - /** - * Infer the data file name from the link file name. - * - * @param linkSource the link file path with the link extension - * @param linkFileMeta additional link file meta data - * @return the data file path - */ - public static String inferDataFileNameFromLinkFile(StreamSource linkSource, String linkFileMeta) throws IOException { - if (linkSource == null || Strings.isNullOrEmpty(linkSource.getFullPath())) { - throw new IllegalArgumentException("Link file path is null or empty. Can not infer data file name."); - } - - var linkPath = linkSource.getSourceReference().getUrl(); - - // Remove common the root if set. - var pathReplacements = System.getenv("GOR_DRIVER_LINK_INFER_REPLACE"); - if (!Strings.isNullOrEmpty(pathReplacements)) { - var parts = pathReplacements.split(";", 2); - linkPath = linkPath.replaceAll(parts[0], parts.length > 1 ? parts[1] : ""); - } - - // Adjust link path so it suitable as part of data file path. - if (PathUtils.isAbsolutePath(linkPath)) { - throw new IllegalArgumentException("Link file path is absolute and gor.driver.link.common.root is not set. Can not infer data file name: " + linkSource.getFullPath()); - } - - var dataFileRootPath = ""; - - // Get root from the link file - var link = linkSource.exists() - ? load(linkSource).appendMeta(linkFileMeta) - : create(linkSource, linkFileMeta); - var linkDataFileRootPath = link.getMeta().getProperty(LinkFileMeta.HEADER_CONTENT_LOCATION_MANAGED_KEY); - if (!Strings.isNullOrEmpty(linkDataFileRootPath)) { - dataFileRootPath = linkDataFileRootPath; - } - - // Get root from global const - if (Strings.isNullOrEmpty(dataFileRootPath)) { - dataFileRootPath = System.getenv(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL); - } - - if (Strings.isNullOrEmpty(dataFileRootPath)) { - throw new IllegalArgumentException("Link file data root path is not set. Can not infer data file name from link file: " + linkSource.getFullPath()); - } - - // Create file name - String randomString = RandomStringUtils.random(8, true, true); - var linkPathSplit = linkPath.indexOf('.'); - if (linkPathSplit > 0) { - linkPath = "%s.%s.%s".formatted( - linkPath.substring(0, linkPathSplit), - randomString, - linkPath.substring(linkPathSplit + 1)); - } else { - linkPath = "%s.%s".formatted(linkPath, randomString); - } - - linkPath = linkPath.replaceAll("\\.link$", ""); - // Insert project - var project = linkSource.getSourceReference().getCommonRoot() != null - ? PathUtils.getFileName(linkSource.getSourceReference().getCommonRoot()) : ""; - if (!Strings.isNullOrEmpty(project)) { - dataFileRootPath = PathUtils.resolve(dataFileRootPath, project); - } - - return PathUtils.resolve(dataFileRootPath, linkPath); - } protected final StreamSource source; protected final LinkFileMeta meta; @@ -213,7 +142,7 @@ private String getUrlFromEntry(LinkFileEntry entry) { linkUrl = PathUtils.resolve(PathUtils.getParent(this.source.getFullPath()), linkUrl); } - // Handle link sub-path if needed. + // Handle the link sub-path if needed. SourceReference sourceReference = source.getSourceReference(); if (sourceReference != null) { String linkSubPath = sourceReference.getLinkSubPath(); @@ -311,7 +240,7 @@ public boolean rollbackLatestEntry() { /** * Remove entries that are newer than the provided timestamp. * - * @param timestamp the timestamp to rollback to (inclusive) + * @param timestamp the timestamp to roll back to (inclusive) * @return true if one or more entries were removed, otherwise false. */ public boolean rollbackToTimestamp(long timestamp) { @@ -336,7 +265,7 @@ public void save(long timestamp) { } private void save(OutputStream os, long timestamp) { - meta.setProperty(meta.HEADER_SERIAL_KEY, Integer.toString(Integer.parseInt(meta.getProperty(meta.HEADER_SERIAL_KEY, "0")) + 1)); + meta.setProperty(LinkFileMeta.HEADER_SERIAL_KEY, Integer.toString(Integer.parseInt(meta.getProperty(LinkFileMeta.HEADER_SERIAL_KEY, "0")) + 1)); var content = new StringBuilder(getHeader()); @@ -359,7 +288,7 @@ private void save(OutputStream os, long timestamp) { /** - * Load content from the source, if it exists. + * Load content from the source if it exists. * * @param source the source to load from * @return the content of the link file or null if it does not exist (empty indicates version 0 link file). diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java index 66ef262e..7a9c60b7 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java @@ -2,7 +2,6 @@ import org.apache.commons.lang3.StringUtils; import org.gorpipe.gor.model.BaseMeta; -import org.gorpipe.gor.model.FileReader; import org.gorpipe.util.Strings; import java.util.stream.Collectors; @@ -13,10 +12,10 @@ public class LinkFileMeta extends BaseMeta { public static final String HEADER_ENTRIES_COUNT_MAX_KEY = "ENTRIES_COUNT_MAX"; // Max age of entries to keep track of in the link file. public static final String HEADER_ENTRIES_AGE_MAX_KEY = "ENTRIES_AGE_MAX"; - // Path if the managed content data location. - public static final String HEADER_CONTENT_LOCATION_MANAGED_KEY = "CONTENT_LOCATION_MANAGED"; + // Path if the managed data location. + public static final String HEADER_DATA_LOCATION_KEY = "DATA_LOCATION"; // Should the content lifecycle be managed (data deleted if the link is removed from the link file) (true or false). - public static final String HEADER_CONTENT_LIFECYCLE_MANAGED_KEY = "CONTENT_LIFECYCLE_MANAGED"; + public static final String HEADER_DATA_LIFECYCLE_MANAGED_KEY = "DATA_LIFECYCLE_MANAGED"; private static final String DEFAULT_VERSION = System.getProperty("gor.driver.link.default.version", "1"); diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java new file mode 100644 index 00000000..ece1da2b --- /dev/null +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java @@ -0,0 +1,164 @@ +package org.gorpipe.gor.driver.linkfile; + +import gorsat.Commands.CommandParseUtilities; +import org.apache.commons.io.FilenameUtils; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.StringUtils; +import org.gorpipe.gor.driver.GorDriverConfig; +import org.gorpipe.gor.driver.providers.stream.sources.StreamSource; +import org.gorpipe.gor.model.DriverBackedFileReader; +import org.gorpipe.gor.model.FileReader; +import org.gorpipe.gor.table.util.PathUtils; +import org.gorpipe.util.Strings; + +import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class LinkFileUtil { + + /** + * Infer the data file name from the link file name. + * + * @param linkSource the link file path with the link extension + * @param linkFileMeta additional link file meta data + * @return the data file path + */ + public static String inferDataFileNameFromLinkFile(StreamSource linkSource, String linkFileMeta) throws IOException { + if (linkSource == null || Strings.isNullOrEmpty(linkSource.getFullPath())) { + throw new IllegalArgumentException("Link file path is null or empty. Can not infer data file name."); + } + + var linkPath = linkSource.getSourceReference().getUrl(); + + // Remove common the root if set. + var pathReplacements = System.getenv("GOR_DRIVER_LINK_INFER_REPLACE"); + if (!Strings.isNullOrEmpty(pathReplacements)) { + var parts = pathReplacements.split(";", 2); + linkPath = linkPath.replaceAll(parts[0], parts.length > 1 ? parts[1] : ""); + } + + // Adjust the link path so it suitable as part of the data file path. + if (PathUtils.isAbsolutePath(linkPath)) { + throw new IllegalArgumentException("Link file path is absolute. Can not infer data file name: " + linkSource.getFullPath()); + } + + var dataFileRootPath = ""; + + // Get root from the link file + var link = linkSource.exists() + ? LinkFile.load(linkSource).appendMeta(linkFileMeta) + : LinkFile.create(linkSource, linkFileMeta); + + var linkDataFileRootPath = link.getMeta().getProperty(LinkFileMeta.HEADER_DATA_LOCATION_KEY); + if (!Strings.isNullOrEmpty(linkDataFileRootPath)) { + dataFileRootPath = linkDataFileRootPath; + } else if (link.getLatestEntry() != null) { + dataFileRootPath = PathUtils.getParent(link.getLatestEntryUrl()); + } + + if (!Strings.isNullOrEmpty(linkDataFileRootPath)) { + dataFileRootPath = linkDataFileRootPath; + } + + // Get root from global const + if (Strings.isNullOrEmpty(dataFileRootPath)) { + dataFileRootPath = System.getenv(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL); + + // Insert project, only if we use global and global is set + if (!Strings.isNullOrEmpty(dataFileRootPath)) { + var project = linkSource.getSourceReference().getCommonRoot() != null + ? PathUtils.getFileName(linkSource.getSourceReference().getCommonRoot()) : ""; + if (!Strings.isNullOrEmpty(project)) { + dataFileRootPath = PathUtils.resolve(dataFileRootPath, project); + } + } + } + + // Create a file name + String uniqId = RandomStringUtils.insecure().next(8, true, true); + var linkPathSplit = linkPath.indexOf('.'); + if (linkPathSplit > 0) { + linkPath = "%s.%s.%s".formatted( + linkPath.substring(0, linkPathSplit), + uniqId, + linkPath.substring(linkPathSplit + 1)); + } else { + linkPath = "%s.%s".formatted(linkPath, uniqId); + } + + linkPath = linkPath.replaceAll("\\.link$", ""); + + return PathUtils.resolve(dataFileRootPath, linkPath); + } + + + private static Pattern linkPattern = Pattern.compile(".* -link ([^\\s]*) ?.*", Pattern.CASE_INSENSITIVE); + private static Pattern linkMetaPattern = Pattern.compile(".* -linkMeta [\"']([^\\s]*)[\"'] ?.*", Pattern.CASE_INSENSITIVE); + + public static String extractLinkOptionData(String options) { + Matcher matcher = linkPattern.matcher(options); + if (matcher.matches()) { + return matcher.group(1); + } + return ""; + } + + public static String extractLinkMetaOptionData(String options) { + Matcher matcher = linkMetaPattern.matcher(options); + if (matcher.matches()) { + return matcher.group(1); + } + return ""; + } + + public record LinkData(String linkFile, String linkFileContent, String linkFileMeta, String linkFileInfo, String md5) {} + + public static LinkData extractLink(FileReader fileReader, String source, String optLinkFile, String optLinkFileMeta, String md5) { + var linkFile = LinkFile.validateAndUpdateLinkFileName(optLinkFile); + var linkFileContent = !Strings.isNullOrEmpty(linkFile) ? PathUtils.resolve(fileReader.getCommonRoot(), source) : ""; + + if (Strings.isNullOrEmpty(linkFile) && !Strings.isNullOrEmpty(source)) { + // Check if link file is forced from the source + var dataSource = fileReader.resolveUrl(source, true); + if (dataSource != null && dataSource.forceLink()) { + linkFile = dataSource.getProjectLinkFile(); + linkFileContent = dataSource.getProjectLinkFileContent(); + } + } + var metaInfo = extractLinkMetaInfo(optLinkFileMeta); + return new LinkData(linkFile, linkFileContent, metaInfo.linkFileMeta, metaInfo.linkFileInfo, md5); + } + + public static LinkData extractLinkMetaInfo(String optLinkFileMeta) { + var linkFileMeta = ""; + var linkFileInfo = ""; + + if (!Strings.isNullOrEmpty(optLinkFileMeta)) { + for (String s : CommandParseUtilities.quoteSafeSplit(StringUtils.strip(optLinkFileMeta, "\"\'"), ',')) { + var l = s.trim(); + if (l.startsWith(LinkFileEntryV1.ENTRY_INFO_KEY)) { + linkFileInfo = StringUtils.strip(l.substring(LinkFileEntryV1.ENTRY_INFO_KEY.length() + 1), "\"\'"); + } else { + linkFileMeta += "## " + l + "\n"; + } + } + } + + return new LinkData("", "", linkFileMeta, linkFileInfo, ""); + } + + public static void writeLinkFile(FileReader fileReader, LinkData linkData) throws IOException { + // Validate that we can write to the location (skip link extension as writing links is always forbidden). + fileReader.resolveUrl(FilenameUtils.removeExtension(linkData.linkFile), true); + + // Use the nonsecure driver file reader as this is an exception from the write no links rule. + var unsecureFileReader = new DriverBackedFileReader(fileReader.getSecurityContext(), + fileReader.getCommonRoot(), fileReader.getQueryTime()); + + LinkFile.load((StreamSource)unsecureFileReader.resolveUrl(linkData.linkFile, true)) + .appendMeta(linkData.linkFileMeta) + .appendEntry(linkData.linkFileContent, linkData.md5, linkData.linkFileInfo, fileReader) + .save(fileReader.getQueryTime()); + } +} diff --git a/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java b/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java index 193e1e8e..a4067946 100644 --- a/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java +++ b/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java @@ -14,11 +14,8 @@ import java.io.ByteArrayInputStream; import java.io.IOException; -import java.lang.reflect.Field; import java.nio.file.Files; import java.nio.file.Path; -import java.util.HashMap; -import java.util.Map; import static org.junit.Assert.*; import static org.mockito.Mockito.*; @@ -197,26 +194,27 @@ public void testSaveLinkFileV1ToV0() throws IOException { @Test(expected = IllegalArgumentException.class) public void testInferDataFileNameFromLinkFile_NullOrEmptyPath() throws Exception { - LinkFile.inferDataFileNameFromLinkFile(new FileSource(""), null); + LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource(""), null); } @Test(expected = IllegalArgumentException.class) public void testInferDataFileNameFromLinkFile_AbsolutePath() throws Exception { - LinkFile.inferDataFileNameFromLinkFile(new FileSource("/abs/path/x.link"), null); + LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource("/abs/path/x.link"), null); } - @Test(expected = IllegalArgumentException.class) + @Test public void testInferDataFileNameFromLinkFile_NoRootConfigured() throws Exception { - environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL, null); - LinkFile.inferDataFileNameFromLinkFile(new FileSource("x.link"), null); + environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL, null); + var ret = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource("x.link"), null); + assertTrue(ret.startsWith("x.")); } @Test public void testInferDataFileNameFromLinkFile_FromEnvVariable_WithProject() throws Exception { String root = "/managed/root"; - environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL, root); + environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL, root); - String result = LinkFile.inferDataFileNameFromLinkFile(new FileSource(new SourceReference("x.gor.link", null, "/projects/test", -1, null, null, false, false)), null); + String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource(new SourceReference("x.gor.link", null, "/projects/test", -1, null, null, false, false)), null); assertNotNull(result); assertTrue(result.matches((root + "/test/x\\..*\\.gor").replace("/", "\\/"))); } @@ -224,9 +222,9 @@ public void testInferDataFileNameFromLinkFile_FromEnvVariable_WithProject() thro @Test public void testInferDataFileNameFromLinkFile_FromEnvVariable_WithOutProject() throws Exception { String root = "/managed/root"; - environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL, root); + environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL, root); - String result = LinkFile.inferDataFileNameFromLinkFile(new FileSource("x.gor.link"), null); + String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource("x.gor.link"), null); assertTrue(result.matches((root + "/x\\..*\\.gor").replace("/", "\\/"))); } @@ -235,20 +233,20 @@ public void testInferDataFileNameFromLinkFile_FromExiting_File() throws Exceptio String root = "/managed/fromfile"; String linkFilePath = "x.gor.link"; Files.createDirectory(workPath.resolve("test")); - Files.writeString(workPath.resolve("test").resolve(linkFilePath), "## " + LinkFileMeta.HEADER_CONTENT_LOCATION_MANAGED_KEY + " = " + root + "\nsource/y.gorz\n"); + Files.writeString(workPath.resolve("test").resolve(linkFilePath), "## " + LinkFileMeta.HEADER_DATA_LOCATION_KEY + " = " + root + "\nsource/y.gorz\n"); - String result = LinkFile.inferDataFileNameFromLinkFile(new FileSource(new SourceReference(linkFilePath, null, workPath.resolve("test").toString(), -1, null, null, false, false)), null); + String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource(new SourceReference(linkFilePath, null, workPath.resolve("test").toString(), -1, null, null, false, false)), null); assertNotNull(result); - assertTrue(result.matches((root + "/test/x\\..*\\.gor").replace("/", "\\/"))); + assertTrue(result.matches((root + "/x\\..*\\.gor").replace("/", "\\/"))); } @Test public void testInferDataFileNameFromLinkFile_FromMetaParam() throws Exception { String root = "/managed/fromparam"; String linkFilePath = "x.gor.link"; - String linkFileMeta = "## " + LinkFileMeta.HEADER_CONTENT_LOCATION_MANAGED_KEY + " = " + root; + String linkFileMeta = "## " + LinkFileMeta.HEADER_DATA_LOCATION_KEY + " = " + root; - String result = LinkFile.inferDataFileNameFromLinkFile(new FileSource(new SourceReference(linkFilePath)), linkFileMeta); + String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource(new SourceReference(linkFilePath)), linkFileMeta); assertNotNull(result); assertTrue(result.matches((root + "/x\\..*\\.gor").replace("/", "\\/"))); } @@ -257,12 +255,12 @@ public void testInferDataFileNameFromLinkFile_FromMetaParam() throws Exception { public void testInferDataFileNameFromLinkFile_FromMetaParam_ExistingFile() throws Exception { String fileroot = "/managed/fromfile"; String linkFilePath = "x.gor.link"; - Files.writeString(workPath.resolve(linkFilePath), "## " + LinkFileMeta.HEADER_CONTENT_LOCATION_MANAGED_KEY + " = " + fileroot + "\nsource/y.gorz\n"); + Files.writeString(workPath.resolve(linkFilePath), "## " + LinkFileMeta.HEADER_DATA_LOCATION_KEY + " = " + fileroot + "\nsource/y.gorz\n"); String paramroot = "/managed/fromparam"; - String linkFileMeta = "## " + LinkFileMeta.HEADER_CONTENT_LOCATION_MANAGED_KEY + " = " + paramroot; + String linkFileMeta = "## " + LinkFileMeta.HEADER_DATA_LOCATION_KEY + " = " + paramroot; - String result = LinkFile.inferDataFileNameFromLinkFile(new FileSource(new SourceReference(linkFilePath)), linkFileMeta); + String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource(new SourceReference(linkFilePath)), linkFileMeta); assertNotNull(result); assertTrue(result.matches((paramroot + "/x\\..*\\.gor").replace("/", "\\/"))); } @@ -270,10 +268,10 @@ public void testInferDataFileNameFromLinkFile_FromMetaParam_ExistingFile() throw @Test public void testInferDataFileNameFromLinkFile_PathReplace() throws Exception { String root = "/managed/root"; - environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL, root); + environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL, root); environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_INFER_REPLACE, "wont;will"); - String result = LinkFile.inferDataFileNameFromLinkFile(new FileSource("wont/x.gor.link"), null); + String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource("wont/x.gor.link"), null); assertNotNull(result); assertTrue(result.matches((root + "/will/x\\..*\\.gor").replace("/", "\\/"))); @@ -282,10 +280,10 @@ public void testInferDataFileNameFromLinkFile_PathReplace() throws Exception { @Test public void testInferDataFileNameFromLinkFile_AbsolutePathReplace() throws Exception { String root = "/managed/root"; - environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_FILES_URL, root); + environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL, root); environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_INFER_REPLACE, "\\/abs\\/"); - String result = LinkFile.inferDataFileNameFromLinkFile(new FileSource("/abs/path/x.gor.link"), null); + String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource("/abs/path/x.gor.link"), null); assertNotNull(result); assertTrue(result.matches((root + "/path/x\\..*\\.gor").replace("/", "\\/"))); From 66af80117251722b3bd5613d706626feaee920d0 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Sun, 21 Dec 2025 23:06:27 +0000 Subject: [PATCH 04/18] fat(ENGKNOW-2781): Minor refactoring. Cache temp file name for FileTable. --- .../main/java/org/gorpipe/gor/table/files/FileTable.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gortools/src/main/java/org/gorpipe/gor/table/files/FileTable.java b/gortools/src/main/java/org/gorpipe/gor/table/files/FileTable.java index 69123cf5..4927eefb 100644 --- a/gortools/src/main/java/org/gorpipe/gor/table/files/FileTable.java +++ b/gortools/src/main/java/org/gorpipe/gor/table/files/FileTable.java @@ -82,17 +82,18 @@ private void init() { support = new TableTwoPhaseCommitSupport(this) { @Override public void saveTempMainFile() { + var tempMainFileName = getTempMainFileName(); // Move our temp file to the standard temp file and clean up. // or if these are links update the link file to point to the new temp file. // Clean up (remove old files and temp files) s - log.debug(String.format("Saving temp file (%s)to temp main file (%s) ", tempOutFilePath, getTempMainFileName())); + log.debug(String.format("Saving temp file (%s)to temp main file (%s) ", tempOutFilePath, tempMainFileName)); try { if (tempOutFilePath != null && getFileReader().exists(tempOutFilePath.toString())) { - updateFromTempFile(tempOutFilePath.toString(), getTempMainFileName()); + updateFromTempFile(tempOutFilePath.toString(), tempMainFileName); tempOutFilePath = null; getFileReader().deleteDirectory(getTransactionFolderPath().toString()); } else if (!getFileReader().exists(getPath().toString())) { - writeToFile(Path.of(getTempMainFileName()), new ArrayList<>()); + writeToFile(Path.of(tempMainFileName), new ArrayList<>()); } } catch (IOException e) { throw new GorSystemException("Could not save table", e); From 9cefa583afdb46f296f98afa0ce81628a3df00b4 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Sun, 4 Jan 2026 17:43:16 +0000 Subject: [PATCH 05/18] feat(ENGKNOW-2781): Update dict link file support. --- .../scala/gorsat/Analysis/ForkWrite.scala | 9 +- .../src/test/java/gorsat/UTestPartGor.java | 12 ++ .../gor/driver/linkfile/LinkFileUtil.java | 75 ++++----- .../stream/sources/file/FileSourceType.java | 2 +- .../gor/table/livecycle/TableInfoBase.java | 54 +++++-- .../livecycle/TableLifeCycleSupport.java | 5 +- .../livecycle/TableTwoPhaseCommitSupport.java | 31 +++- .../org/gorpipe/gor/table/util/PathUtils.java | 4 + .../gor/driver/linkfile/LinkFileTest.java | 29 +--- .../UTestGorDictionaryTableVersioned.java | 147 ++++++++++++++++++ 10 files changed, 269 insertions(+), 99 deletions(-) create mode 100644 model/src/test/java/org/gorpipe/gor/table/UTestGorDictionaryTableVersioned.java diff --git a/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala b/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala index bf2e2b7a..efa2103e 100644 --- a/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala +++ b/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala @@ -28,15 +28,13 @@ import gorsat.Outputs.OutFile import org.apache.commons.io.FilenameUtils import org.gorpipe.exceptions.GorResourceException import org.gorpipe.gor.binsearch.GorIndexType -import org.gorpipe.gor.driver.linkfile.{LinkFile, LinkFileEntryV1} +import org.gorpipe.gor.driver.linkfile.LinkFileUtil import org.gorpipe.gor.driver.meta.DataType -import org.gorpipe.gor.driver.providers.stream.sources.StreamSource import org.gorpipe.gor.model.Row import org.gorpipe.gor.session.{GorSession, ProjectContext} import org.gorpipe.gor.table.util.PathUtils import org.gorpipe.gor.util.DataUtil import org.gorpipe.model.gor.RowObj -import org.gorpipe.util.Strings import org.slf4j.{Logger, LoggerFactory} import java.util.UUID @@ -341,7 +339,8 @@ case class ForkWrite(forkCol: Int, }) } - if (options.linkFile.nonEmpty) { + // Only write links for files that are NOT inside gord + if (options.useFolder.isEmpty && !singleFileHolder.fileName.contains(".gord/")) { if (useFork) { forkMap.values.foreach(sh => { val linkData = LinkFileUtil.extractLink(session.getProjectContext.getFileReader, sh.fileName, @@ -351,7 +350,7 @@ case class ForkWrite(forkCol: Int, LinkFileUtil.writeLinkFile(session.getProjectContext.getFileReader, linkData) } }) - } else if (options.useFolder.isEmpty && !singleFileHolder.fileName.contains(".gord/")) { + } else { val linkData = LinkFileUtil.extractLink(session.getProjectContext.getFileReader, singleFileHolder.fileName, options.linkFile, options.linkFileMeta, getMd5) if (linkData.linkFile().nonEmpty) { diff --git a/gortools/src/test/java/gorsat/UTestPartGor.java b/gortools/src/test/java/gorsat/UTestPartGor.java index 5979af00..240ff87c 100644 --- a/gortools/src/test/java/gorsat/UTestPartGor.java +++ b/gortools/src/test/java/gorsat/UTestPartGor.java @@ -309,4 +309,16 @@ public void testPartGorMixedTagsReplacement() { Assert.assertEquals("a,b,c | 'a','b','c' | \"a\",\"b\",\"c\"", result); } + @Test + public void partGorWithFileName() throws IOException { + String contents = "#col1\tcol2\tcol3\tcol4\tcol5\tcol6\tlis_PN\n" + + "data/bucket_1.gorz\tbucket_1\tchr1\t0\tchrZ\t1000000000\tSAMPLE_SIM842_000001\n" + + "data/bucket_10.gorz\tbucket_10\tchr1\t0\tchrZ\t1000000000\tSAMPLE_SIM842_000010\n"; + + File dictFile = FileTestUtils.createTempFile(workDir.getRoot(), "variants.gord", contents); + + String query = "partgor -dict " + dictFile.getAbsolutePath() + " <(gorrow chr1,1,1 | calc x \"#{tags:q}\" | calc fn 's3://test/test.gor')"; + String results = TestUtils.runGorPipe(query); + } + } diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java index ece1da2b..f8fdb3ea 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java @@ -2,11 +2,9 @@ import gorsat.Commands.CommandParseUtilities; import org.apache.commons.io.FilenameUtils; -import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.lang3.StringUtils; import org.gorpipe.gor.driver.GorDriverConfig; import org.gorpipe.gor.driver.providers.stream.sources.StreamSource; -import org.gorpipe.gor.model.DriverBackedFileReader; import org.gorpipe.gor.model.FileReader; import org.gorpipe.gor.table.util.PathUtils; import org.gorpipe.util.Strings; @@ -29,70 +27,43 @@ public static String inferDataFileNameFromLinkFile(StreamSource linkSource, Stri throw new IllegalArgumentException("Link file path is null or empty. Can not infer data file name."); } - var linkPath = linkSource.getSourceReference().getUrl(); - - // Remove common the root if set. - var pathReplacements = System.getenv("GOR_DRIVER_LINK_INFER_REPLACE"); - if (!Strings.isNullOrEmpty(pathReplacements)) { - var parts = pathReplacements.split(";", 2); - linkPath = linkPath.replaceAll(parts[0], parts.length > 1 ? parts[1] : ""); - } - - // Adjust the link path so it suitable as part of the data file path. - if (PathUtils.isAbsolutePath(linkPath)) { - throw new IllegalArgumentException("Link file path is absolute. Can not infer data file name: " + linkSource.getFullPath()); - } - - var dataFileRootPath = ""; + var dataFileParentPath = ""; // Get root from the link file var link = linkSource.exists() ? LinkFile.load(linkSource).appendMeta(linkFileMeta) : LinkFile.create(linkSource, linkFileMeta); - var linkDataFileRootPath = link.getMeta().getProperty(LinkFileMeta.HEADER_DATA_LOCATION_KEY); - if (!Strings.isNullOrEmpty(linkDataFileRootPath)) { - dataFileRootPath = linkDataFileRootPath; + var linkDataFileParentPath = link.getMeta().getProperty(LinkFileMeta.HEADER_DATA_LOCATION_KEY); + if (!Strings.isNullOrEmpty(linkDataFileParentPath)) { + dataFileParentPath = linkDataFileParentPath; } else if (link.getLatestEntry() != null) { - dataFileRootPath = PathUtils.getParent(link.getLatestEntryUrl()); + dataFileParentPath = PathUtils.getParent(link.getLatestEntryUrl()); } - if (!Strings.isNullOrEmpty(linkDataFileRootPath)) { - dataFileRootPath = linkDataFileRootPath; + if (!Strings.isNullOrEmpty(linkDataFileParentPath)) { + dataFileParentPath = linkDataFileParentPath; } // Get root from global const - if (Strings.isNullOrEmpty(dataFileRootPath)) { - dataFileRootPath = System.getenv(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL); + if (Strings.isNullOrEmpty(dataFileParentPath)) { + dataFileParentPath = System.getenv(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL); // Insert project, only if we use global and global is set - if (!Strings.isNullOrEmpty(dataFileRootPath)) { + if (!Strings.isNullOrEmpty(dataFileParentPath)) { var project = linkSource.getSourceReference().getCommonRoot() != null ? PathUtils.getFileName(linkSource.getSourceReference().getCommonRoot()) : ""; if (!Strings.isNullOrEmpty(project)) { - dataFileRootPath = PathUtils.resolve(dataFileRootPath, project); + dataFileParentPath = PathUtils.resolve(dataFileParentPath, project); } } } - // Create a file name - String uniqId = RandomStringUtils.insecure().next(8, true, true); - var linkPathSplit = linkPath.indexOf('.'); - if (linkPathSplit > 0) { - linkPath = "%s.%s.%s".formatted( - linkPath.substring(0, linkPathSplit), - uniqId, - linkPath.substring(linkPathSplit + 1)); - } else { - linkPath = "%s.%s".formatted(linkPath, uniqId); - } - - linkPath = linkPath.replaceAll("\\.link$", ""); + var dataFileName = PathUtils.injectRandomStringIntoFileName(PathUtils.getFileName(linkSource.getFullPath())); - return PathUtils.resolve(dataFileRootPath, linkPath); + return PathUtils.resolve(dataFileParentPath, dataFileName); } - private static Pattern linkPattern = Pattern.compile(".* -link ([^\\s]*) ?.*", Pattern.CASE_INSENSITIVE); private static Pattern linkMetaPattern = Pattern.compile(".* -linkMeta [\"']([^\\s]*)[\"'] ?.*", Pattern.CASE_INSENSITIVE); @@ -114,6 +85,17 @@ public static String extractLinkMetaOptionData(String options) { public record LinkData(String linkFile, String linkFileContent, String linkFileMeta, String linkFileInfo, String md5) {} + + /** + * Extract link data from a linkfile and link options. + * + * @param fileReader filereader + * @param source the link file source + * @param optLinkFile linkfile option string + * @param optLinkFileMeta linkfilemeta option string + * @param md5 md5 for the data file + * @return linkData record with link info. + */ public static LinkData extractLink(FileReader fileReader, String source, String optLinkFile, String optLinkFileMeta, String md5) { var linkFile = LinkFile.validateAndUpdateLinkFileName(optLinkFile); var linkFileContent = !Strings.isNullOrEmpty(linkFile) ? PathUtils.resolve(fileReader.getCommonRoot(), source) : ""; @@ -153,12 +135,11 @@ public static void writeLinkFile(FileReader fileReader, LinkData linkData) throw fileReader.resolveUrl(FilenameUtils.removeExtension(linkData.linkFile), true); // Use the nonsecure driver file reader as this is an exception from the write no links rule. - var unsecureFileReader = new DriverBackedFileReader(fileReader.getSecurityContext(), - fileReader.getCommonRoot(), fileReader.getQueryTime()); + var unsecureFileReader = fileReader.unsecure(); - LinkFile.load((StreamSource)unsecureFileReader.resolveUrl(linkData.linkFile, true)) + LinkFile.loadV1((StreamSource)unsecureFileReader.resolveUrl(linkData.linkFile, true)) .appendMeta(linkData.linkFileMeta) - .appendEntry(linkData.linkFileContent, linkData.md5, linkData.linkFileInfo, fileReader) - .save(fileReader.getQueryTime()); + .appendEntry(linkData.linkFileContent, linkData.md5, linkData.linkFileInfo, unsecureFileReader) + .save(unsecureFileReader.getQueryTime()); } } diff --git a/model/src/main/java/org/gorpipe/gor/driver/providers/stream/sources/file/FileSourceType.java b/model/src/main/java/org/gorpipe/gor/driver/providers/stream/sources/file/FileSourceType.java index 536db3cd..c0a5b935 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/providers/stream/sources/file/FileSourceType.java +++ b/model/src/main/java/org/gorpipe/gor/driver/providers/stream/sources/file/FileSourceType.java @@ -40,6 +40,6 @@ public PRIORITY getPriority() { @Override public boolean match(String file) { // TODO: Until we get better matching strategy we must exclude mem here. - return !DataUtil.isMem(file) && (file.startsWith("file:") || !file.contains(":/")); + return !DataUtil.isMem(file) && (file.startsWith("file:") || !file.matches("^\\p{Alnum}+:/.*")); } } diff --git a/model/src/main/java/org/gorpipe/gor/table/livecycle/TableInfoBase.java b/model/src/main/java/org/gorpipe/gor/table/livecycle/TableInfoBase.java index 815f1374..d6bbcc8e 100644 --- a/model/src/main/java/org/gorpipe/gor/table/livecycle/TableInfoBase.java +++ b/model/src/main/java/org/gorpipe/gor/table/livecycle/TableInfoBase.java @@ -36,15 +36,18 @@ public abstract class TableInfoBase implements TableInfo { private static final Logger log = LoggerFactory.getLogger(TableInfoBase.class); + public static boolean USE_LINKS = Boolean.parseBoolean(System.getProperty("gor.table.use.links", "false")); + protected static final boolean FORCE_SAME_COLUMN_NAMES = Boolean.parseBoolean(System.getProperty("gor.table.validate.columnNames", "true")); public static final String HISTORY_DIR_NAME = "history"; - private final String path; // Path to the table (currently absolute instead of real for compatibility with older code). + private String path; // Path to the tables gord file (currently absolute instead of real for compatibility with older code). private final String folderPath; // Path to the table folder. The table folder is hidden folder that sits next to the // table and contains various files related to it. + private final String linkPath; // Path to the link file. private final String rootUri; // uri to table root (just to improve performance when working with uri's). - private final String name; // Name of the table. - protected String id = null; // Unique id (based on full path (and possibly timestamp), just so we don't always have to refer to full path). + private String name; // Name of the table. + protected String id = null; // Unique id (based on full path (and possibly timestamp), just so we don't always have to refer to full path). protected TableHeader header; // Header info. @@ -56,7 +59,7 @@ public abstract class TableInfoBase implements TableInfo { /** * Main constructor. * - * @param uri path to the dictionary file. + * @param uri path to the dictionary file or folder. */ protected TableInfoBase(String uri, FileReader inputFileReader, TableHeader header) { this.header = header; @@ -69,22 +72,34 @@ protected TableInfoBase(String uri, FileReader inputFileReader, TableHeader head var fileName = PathUtils.getFileName(source.getFullPath()); this.name = FilenameUtils.removeExtension(fileName); - if (GorOptions.DEFAULT_FOLDER_DICTIONARY_NAME.equals(fileName)) { - // thedict passed in (gord folder content) - this.rootUri = normalize(PathUtils.getParent(realUri)); - this.path = PathUtils.resolve(rootUri, GorOptions.DEFAULT_FOLDER_DICTIONARY_NAME); + if (safeCheckExists(PathUtils.resolve(realUri, DataUtil.toLink(GorOptions.DEFAULT_FOLDER_DICTIONARY_NAME)))) { + // Not all data sources support isDirectory (so just check for the dict file) + // GORDFOLDER VERSIONED: with folder (containing versioned link file) passed in. + this.rootUri = realUri; this.folderPath = rootUri; + this.linkPath = PathUtils.resolve(rootUri, DataUtil.toLink(GorOptions.DEFAULT_FOLDER_DICTIONARY_NAME)); + try (var linkFileSource = fileReader.resolveUrl(linkPath)) { + this.path = linkFileSource != null ? linkFileSource.getFullPath() : getNewVersionedFileName(); + } } else if (safeCheckExists(PathUtils.resolve(realUri, GorOptions.DEFAULT_FOLDER_DICTIONARY_NAME))) { // Not all data sources support isDirectory (so just check for the dict file) - // Gord folder passed in. + // GORDFOLDER: with std thedict passed in (gord folder). this.rootUri = realUri; - this.path = PathUtils.resolve(rootUri, GorOptions.DEFAULT_FOLDER_DICTIONARY_NAME); this.folderPath = rootUri; - } else { - // Old school dict or file + this.path = PathUtils.resolve(rootUri, GorOptions.DEFAULT_FOLDER_DICTIONARY_NAME); + this.linkPath = null; + } else if (safeCheckExists(realUri) || (!PathUtils.isMarkedAsFolder(realUri) && !USE_LINKS)) { + // STD GORD: Existing old school dict file OR new dict (not folder and not with USE_LINKS=true). this.rootUri = normalize(PathUtils.getParent(realUri)); - this.path = PathUtils.resolve(rootUri, fileName); this.folderPath = PathUtils.resolve(rootUri, "." + this.name); + this.linkPath = null; + this.path = PathUtils.resolve(rootUri, fileName); + } else { + // GORDFOLDER VERSIONED: New dict with USE_LINKS=true (versioned). + this.rootUri = realUri; + this.folderPath = rootUri; + this.linkPath = PathUtils.resolve(rootUri, DataUtil.toLink(GorOptions.DEFAULT_FOLDER_DICTIONARY_NAME)); + this.path = getNewVersionedFileName(); } } @@ -98,6 +113,10 @@ public String getPath() { return this.path; } + protected void setPath(String path) { + this.path = path; + } + @Override public String getRootPath() { return rootUri; @@ -108,6 +127,10 @@ public String getFolderPath() { return folderPath; } + public String getLinkPath() { + return linkPath; + } + @Override public String[] getColumns() { return this.header.getColumns(); @@ -326,4 +349,9 @@ protected TableHeader parseHeaderFromFile(String file) { return newHeader; } + + protected String getNewVersionedFileName() { + return PathUtils.resolve(getFolderPath(), + PathUtils.injectRandomStringIntoFileName("version" + DataType.GORD.suffix)); + } } diff --git a/model/src/main/java/org/gorpipe/gor/table/livecycle/TableLifeCycleSupport.java b/model/src/main/java/org/gorpipe/gor/table/livecycle/TableLifeCycleSupport.java index 507ab062..a6725f7d 100644 --- a/model/src/main/java/org/gorpipe/gor/table/livecycle/TableLifeCycleSupport.java +++ b/model/src/main/java/org/gorpipe/gor/table/livecycle/TableLifeCycleSupport.java @@ -4,6 +4,7 @@ import org.gorpipe.gor.table.TableHeader; import org.gorpipe.gor.table.util.PathUtils; import org.gorpipe.gor.table.util.TableLog; +import org.gorpipe.util.Strings; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -75,7 +76,9 @@ public void initialize() { table.header.setProperty(TableHeader.HEADER_CREATED_KEY, new SimpleDateFormat("yyyy-MM-dd HH:mm").format(new Date())); } - table.getFileReader().createDirectories(table.getFolderPath(), PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwxr-xr-x"))); + if (!Strings.isNullOrEmpty(table.getFolderPath())) { + table.getFileReader().createDirectories(table.getFolderPath(), PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwxr-xr-x"))); + } if (table.isUseHistory()) { table.getFileReader().createDirectories(tableLog.getLogDir(), PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwxr-xr-x"))); diff --git a/model/src/main/java/org/gorpipe/gor/table/livecycle/TableTwoPhaseCommitSupport.java b/model/src/main/java/org/gorpipe/gor/table/livecycle/TableTwoPhaseCommitSupport.java index af847df7..23d27fc0 100644 --- a/model/src/main/java/org/gorpipe/gor/table/livecycle/TableTwoPhaseCommitSupport.java +++ b/model/src/main/java/org/gorpipe/gor/table/livecycle/TableTwoPhaseCommitSupport.java @@ -1,6 +1,10 @@ package org.gorpipe.gor.table.livecycle; import org.gorpipe.exceptions.GorSystemException; +import org.gorpipe.gor.driver.linkfile.LinkFile; +import org.gorpipe.gor.driver.meta.DataType; +import org.gorpipe.gor.driver.meta.SourceReference; +import org.gorpipe.gor.driver.providers.stream.sources.StreamSource; import org.gorpipe.gor.table.TableHeader; import org.gorpipe.gor.table.util.PathUtils; import org.slf4j.Logger; @@ -41,10 +45,27 @@ public void commitRequest() { @Override public void commit() { try { - if (!table.isUseEmbeddedHeader()) { - updateFromTempFile(getTempMetaFileName(), table.getMetaPath()); + if (this.table.getLinkPath() != null || TableInfoBase.USE_LINKS) { + var newVersionPath = PathUtils.resolve(table.getFolderPath(), table.getNewVersionedFileName()); + + if (!table.isUseEmbeddedHeader()) { + updateFromTempFile(getTempMetaFileName(), + newVersionPath + DataType.META.suffix); + } + updateFromTempFile(getTempMainFileName(), newVersionPath); + this.table.setPath(newVersionPath); + + LinkFile.load((StreamSource) table.fileReader.resolveDataSource(new SourceReference(table.getLinkPath()))) + .appendEntry(table.getPath(), "") + .save(); + + } else { + if (!table.isUseEmbeddedHeader()) { + updateFromTempFile(getTempMetaFileName(), table.getMetaPath()); + } + updateFromTempFile(getTempMainFileName(), table.getPath()); } - updateFromTempFile(getTempMainFileName(), table.getPath()); + } catch (IOException e) { throw new GorSystemException("Could not commit " + table.getPath(), e); } @@ -84,7 +105,7 @@ protected String getTempMainFileName() { } protected String getTempMetaFileName() { - return getTempFileName(table.getMetaPath()); + return getTempMainFileName() + DataType.META.suffix; } protected String getTempFileName(String pathString) { @@ -95,7 +116,7 @@ protected String getTempFileName(String pathString) { private String insertTableFolderIntoFilePath(String pathString) { String fileName = PathUtils.getFileName(pathString); - return PathUtils.resolve(table.getFolderPath(), (fileName)); + return PathUtils.resolve(table.getFolderPath(), fileName); } private String insertTempIntoFileName(String pathString) { diff --git a/model/src/main/java/org/gorpipe/gor/table/util/PathUtils.java b/model/src/main/java/org/gorpipe/gor/table/util/PathUtils.java index 1c0f76c5..daf9ebea 100644 --- a/model/src/main/java/org/gorpipe/gor/table/util/PathUtils.java +++ b/model/src/main/java/org/gorpipe/gor/table/util/PathUtils.java @@ -246,6 +246,10 @@ public static URI markAsFolder(URI path) { return path; } + public static boolean isMarkedAsFolder(String path) { + return path.endsWith("/"); + } + public static URI toRealPath(URI uri) { if (isLocal(uri)) { try { diff --git a/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java b/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java index 82b2073e..dd2191d1 100644 --- a/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java +++ b/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java @@ -194,18 +194,6 @@ public void testInferDataFileNameFromLinkFile_NullOrEmptyPath() throws Exception LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource(""), null); } - @Test(expected = IllegalArgumentException.class) - public void testInferDataFileNameFromLinkFile_AbsolutePath() throws Exception { - LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource("/abs/path/x.link"), null); - } - - @Test - public void testInferDataFileNameFromLinkFile_NoRootConfigured() throws Exception { - environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL, null); - var ret = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource("x.link"), null); - assertTrue(ret.startsWith("x.")); - } - @Test public void testInferDataFileNameFromLinkFile_FromEnvVariable_WithProject() throws Exception { String root = "/managed/root"; @@ -230,11 +218,11 @@ public void testInferDataFileNameFromLinkFile_FromExiting_File() throws Exceptio String root = "/managed/fromfile"; String linkFilePath = "x.gor.link"; Files.createDirectory(workPath.resolve("test")); - Files.writeString(workPath.resolve("test").resolve(linkFilePath), "## " + LinkFileMeta.HEADER_DATA_LOCATION_KEY + " = " + root + "\nsource/y.gorz\n"); + Files.writeString(workPath.resolve("test").resolve(linkFilePath), root + "/source/y.gorz\n"); String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource(new SourceReference(linkFilePath, null, workPath.resolve("test").toString(), -1, null, null, false, false)), null); assertNotNull(result); - assertTrue(result.matches((root + "/x\\..*\\.gor").replace("/", "\\/"))); + assertTrue(result.matches((root + "/source/x\\..*\\.gor").replace("/", "\\/"))); } @Test @@ -266,19 +254,6 @@ public void testInferDataFileNameFromLinkFile_FromMetaParam_ExistingFile() throw public void testInferDataFileNameFromLinkFile_AbsolutePath() throws Exception { String root = "/managed/root"; environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL, root); - environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_INFER_REPLACE, "wont;will"); - - String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource("wont/x.gor.link"), null); - - assertNotNull(result); - assertTrue(result.matches((root + "/will/x\\..*\\.gor").replace("/", "\\/"))); - } - - @Test - public void testInferDataFileNameFromLinkFile_AbsolutePathReplace() throws Exception { - String root = "/managed/root"; - environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL, root); - environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_INFER_REPLACE, "\\/abs\\/"); String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource("/abs/path/x.gor.link"), null); diff --git a/model/src/test/java/org/gorpipe/gor/table/UTestGorDictionaryTableVersioned.java b/model/src/test/java/org/gorpipe/gor/table/UTestGorDictionaryTableVersioned.java new file mode 100644 index 00000000..85f7e558 --- /dev/null +++ b/model/src/test/java/org/gorpipe/gor/table/UTestGorDictionaryTableVersioned.java @@ -0,0 +1,147 @@ +/* + * BEGIN_COPYRIGHT + * + * Copyright (C) 2011-2013 deCODE genetics Inc. + * Copyright (C) 2013-2019 WuXi NextCode Inc. + * All Rights Reserved. + * + * GORpipe is free software: you can redistribute it and/or modify + * it under the terms of the AFFERO GNU General Public License as published by + * the Free Software Foundation. + * + * GORpipe is distributed "AS-IS" AND WITHOUT ANY WARRANTY OF ANY KIND, + * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, + * NON-INFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. See + * the AFFERO GNU General Public License for the complete license terms. + * + * You should have received a copy of the AFFERO GNU General Public License + * along with GORpipe. If not, see + * + * END_COPYRIGHT + */ + +package org.gorpipe.gor.table; + +import org.apache.commons.io.FileUtils; +import org.gorpipe.gor.table.dictionary.gor.GorDictionaryTable; +import org.gorpipe.gor.table.util.PathUtils; +import org.gorpipe.test.utils.FileTestUtils; +import org.junit.*; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +import static org.gorpipe.gor.table.dictionary.gor.GorDictionaryTableMeta.HEADER_BUCKETIZE_KEY; + +/** + * Unit tests for gor table. + *

+ * Created by gisli on 03/01/16. + */ +public class UTestGorDictionaryTableVersioned { + + private static Path tableWorkDir; + private static String gort1; + + @Before + public void setUp() throws Exception { + tableWorkDir = Files.createTempDirectory("UnitTestGorTableWorkDir"); + + for (int i = 1; i < 25; i++) { + Files.createFile(tableWorkDir.resolve(String.format("filepath%d.gor", i))); + } + + gort1 = "filepath1.gor\n" + + "filepath2.gor\ttagA\n" + + "filepath3.gor\ttagB\n" + + "filepath4.gor\t\tchr1\t10000\tchr1\t30000\ttagD,tagE\n" + + "filepath5.gor\ttagF\tchr1\t10000\tchr1\t20000\t\n" + + "filepath6.gor\ttagF\tchr1\t30000\tchr2\t10000\t\n" + + "filepath7.gor\t\tchr3\t10000\tchr4\t10000\ttagF1,tagF2\n" + + "filepath8.gor\ttagA\n" + + "filepath9.gor|bucket1\ttagG\n" + + "filepath10.gor|bucket1\ttagH\n" + + "filepath11.gor|bucket2\ttagI\n" + + "filepath12.gor|bucket2\t\tchr1\t1\tchr2\t20000\ttagJ,tagK\n" + + "filepath13.gor|bucket2\n" + + "filepath14.gor|D|bucket2\ttagL\n" + + "filepath15.gor|D|bucket2\n" + + "filepath16.gor\ttagD\n" + + "filepath17.gor\ttagB\n" + + "filepath18.gor\t\t\t\t\t\ttagJ,tagM\n" + + "filepath19.gor\ttagK\n"; + } + + @AfterClass + public static void tearDown() throws Exception { + FileUtils.deleteDirectory(tableWorkDir.toFile()); + } + + @Test + public void testTableCreation() { + String tableName = "gortable_table_creation"; + Path gordFile = tableWorkDir.resolve(tableName + ".gord"); + + GorDictionaryTable dict = new GorDictionaryTable.Builder<>(PathUtils.markAsFolder(gordFile.toString())).build(); + dict.save(); + + Assert.assertEquals("Path check failed", gordFile.toAbsolutePath(), Path.of(dict.getFolderPath())); + + dict = new GorDictionaryTable.Builder<>(gordFile).build(); + Assert.assertEquals("Path check failed", gordFile.toAbsolutePath(), Path.of(dict.getFolderPath())); + + Assert.assertEquals(null, dict.getBooleanConfigTableProperty(HEADER_BUCKETIZE_KEY, null)); + } + + + @Test + public void testTableSaveLoad() throws IOException { + String tableName = "gortable_table_load"; + Path gordFile = tableWorkDir.resolve(tableName + ".gord"); + Files.createDirectory(gordFile); + Files.write(gordFile.resolve("version.v1.gord"), gort1.getBytes()); + Files.write(gordFile.resolve("thedict.gord.link"), "## VERSION = 1\nversion.v1.gord".getBytes()); + + GorDictionaryTable dict = new GorDictionaryTable.Builder<>(gordFile).build(); + dict.save(); + + Assert.assertNotEquals(gordFile.resolve("thedict.v1.gord"), dict.getPath()); + String savedContent = Files.readString(Path.of(dict.getPath())); + Assert.assertEquals("Content not loaded or saved correctly", gort1, savedContent); + } + + @Test + public void testRepeatedSaves() { + String tableName = "gortable_repeated_saves"; + Path gordFile = new File(tableWorkDir.toFile(), tableName + ".gord").toPath(); + + GorDictionaryTable dict = new GorDictionaryTable.Builder<>(gordFile).embeddedHeader(false).build(); + dict.save(); + dict.save(); + + GorDictionaryTable dict2 = new GorDictionaryTable.Builder<>(gordFile).embeddedHeader(false).build(); + dict2.reload(); + Assert.assertEquals("Dicts are different", dict.getEntries(), dict2.getEntries()); + } + + @Test + public void testCreateSimple() { + // Add one file. + String tableName = "gortable_create_simple"; + String dataFileName = Paths.get("../tests/data/gor/genes.gor").toAbsolutePath().toString(); + GorDictionaryTable dict = new GorDictionaryTable.Builder<>( + PathUtils.markAsFolder(tableWorkDir.resolve(tableName + ".gord").toString())).build(); + + dict.insert(dataFileName); + dict.save(); + + Assert.assertTrue("BaseTable file was not created", Files.exists(Path.of(dict.getPath()))); + Assert.assertTrue("Link file was not created", Files.exists(Path.of(dict.getLinkPath()))); + + Assert.assertFalse("Logging dir should not be created", new File(tableWorkDir.toFile(), tableName + ".log").exists()); + Assert.assertArrayEquals("Columns def not correct", new String[]{"Chrom", "gene_start", "gene_end", "Gene_Symbol"}, dict.getColumns()); + } +} From 691a1088c1337f207c644088d8a7979fc036ae92 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Sat, 10 Jan 2026 14:15:51 +0000 Subject: [PATCH 06/18] fix(ENGKNOW-2781): Fix invalid sec context. --- .github/workflows/build.yml | 4 ++-- gortools/src/main/scala/gorsat/Commands/Write.scala | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6b2aa47d..315b9af5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -142,8 +142,8 @@ jobs: files: '**/TEST-*.xml' publishSnapshot: - if: ${{ github.ref == 'refs/heads/main' }} - needs: [test, slowTest, integrationTest] + #if: ${{ github.ref == 'refs/heads/main' }} + #needs: [test, slowTest, integrationTest] runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 diff --git a/gortools/src/main/scala/gorsat/Commands/Write.scala b/gortools/src/main/scala/gorsat/Commands/Write.scala index 738feca2..52c1d36c 100644 --- a/gortools/src/main/scala/gorsat/Commands/Write.scala +++ b/gortools/src/main/scala/gorsat/Commands/Write.scala @@ -33,13 +33,18 @@ import org.gorpipe.gor.driver.meta.{DataType, SourceReference} import org.gorpipe.gor.driver.providers.stream.sources.StreamSource import org.gorpipe.gor.session.GorContext import org.gorpipe.gor.util.DataUtil +import org.slf4j.{Logger, LoggerFactory} +object Write { + val log: Logger = LoggerFactory.getLogger(this.getClass) +} class Write extends CommandInfo("WRITE", CommandArguments("-r -c -m -inferschema -maxseg -noheader", "-d -f -i -t -l -tags -card -prefix -link -linkmeta", 0), CommandOptions(gorCommand = true, norCommand = true, verifyCommand = true)) { def parseBaseOptions(context: GorContext, iargs: Array[String], args: Array[String], executeNor: Boolean): (String, Option[String], Boolean) = { + Write.log.warn("Entering parseBaseOptions", new Exception("Getting stracktrace")) var fileName = replaceSingleQuotes(iargs.mkString(" ")) val linkOpt = if (hasOption(args, "-link")) stringValueOfOption(args, "-link") else "" From 698846dc03c5c512324bfb8068ba7f5da56ebe49 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Sat, 10 Jan 2026 15:50:22 +0000 Subject: [PATCH 07/18] fix(ENGKNOW-2781): Fix invalid sec context. --- gortools/src/main/scala/gorsat/Commands/Write.scala | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/gortools/src/main/scala/gorsat/Commands/Write.scala b/gortools/src/main/scala/gorsat/Commands/Write.scala index 52c1d36c..320c2a05 100644 --- a/gortools/src/main/scala/gorsat/Commands/Write.scala +++ b/gortools/src/main/scala/gorsat/Commands/Write.scala @@ -26,6 +26,7 @@ import java.util.zip.Deflater import gorsat.Analysis.{ForkWrite, OutputOptions} import gorsat.Commands.CommandParseUtilities._ import org.apache.commons.io.FilenameUtils +import org.gorpipe.base.security.{BundledCredentials, Credentials} import org.gorpipe.exceptions.{GorParsingException, GorResourceException} import org.gorpipe.gor.binsearch.GorIndexType import org.gorpipe.gor.driver.linkfile.LinkFileUtil @@ -35,6 +36,9 @@ import org.gorpipe.gor.session.GorContext import org.gorpipe.gor.util.DataUtil import org.slf4j.{Logger, LoggerFactory} +import java.util +import java.util.List + object Write { val log: Logger = LoggerFactory.getLogger(this.getClass) } @@ -44,7 +48,13 @@ class Write extends CommandInfo("WRITE", CommandOptions(gorCommand = true, norCommand = true, verifyCommand = true)) { def parseBaseOptions(context: GorContext, iargs: Array[String], args: Array[String], executeNor: Boolean): (String, Option[String], Boolean) = { - Write.log.warn("Entering parseBaseOptions", new Exception("Getting stracktrace")) + Write.log.warn("Entering parseBaseOptions ", new Exception("Getting stracktrace")) + val creds: BundledCredentials = BundledCredentials.fromSecurityContext(context.getSession.getProjectContext.getFileReader.getSecurityContext) + val cred: util.List[Credentials] = creds.getCredentials("s3", "clinops-reference-data") + if (cred == null || cred.isEmpty) Write.log.warn("pbo No S3 credentials found in security") + else Write.log.info("pbo Found S3 credentials, stuff: {}", cred.get(0).getLookupKey) + + var fileName = replaceSingleQuotes(iargs.mkString(" ")) val linkOpt = if (hasOption(args, "-link")) stringValueOfOption(args, "-link") else "" From 6a5b49be7381ab3ea21deef951950424f7a2b909 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Sat, 10 Jan 2026 16:07:41 +0000 Subject: [PATCH 08/18] fix(ENGKNOW-2781): Fix invalid sec context. --- gortools/src/main/scala/gorsat/Commands/Write.scala | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/gortools/src/main/scala/gorsat/Commands/Write.scala b/gortools/src/main/scala/gorsat/Commands/Write.scala index 320c2a05..2dbab9cc 100644 --- a/gortools/src/main/scala/gorsat/Commands/Write.scala +++ b/gortools/src/main/scala/gorsat/Commands/Write.scala @@ -48,13 +48,6 @@ class Write extends CommandInfo("WRITE", CommandOptions(gorCommand = true, norCommand = true, verifyCommand = true)) { def parseBaseOptions(context: GorContext, iargs: Array[String], args: Array[String], executeNor: Boolean): (String, Option[String], Boolean) = { - Write.log.warn("Entering parseBaseOptions ", new Exception("Getting stracktrace")) - val creds: BundledCredentials = BundledCredentials.fromSecurityContext(context.getSession.getProjectContext.getFileReader.getSecurityContext) - val cred: util.List[Credentials] = creds.getCredentials("s3", "clinops-reference-data") - if (cred == null || cred.isEmpty) Write.log.warn("pbo No S3 credentials found in security") - else Write.log.info("pbo Found S3 credentials, stuff: {}", cred.get(0).getLookupKey) - - var fileName = replaceSingleQuotes(iargs.mkString(" ")) val linkOpt = if (hasOption(args, "-link")) stringValueOfOption(args, "-link") else "" @@ -62,7 +55,9 @@ class Write extends CommandInfo("WRITE", fileName = if (fileName.isEmpty && linkOpt.nonEmpty) { val linkMetaInfo = LinkFileUtil.extractLinkMetaInfo(linkMetaOpt) - val linkSourceRef = new SourceReference(linkOpt, null, context.getSession.getProjectContext.getFileReader.getCommonRoot, null, null, true); + val linkSourceRef = new SourceReference(linkOpt, + context.getSession.getProjectContext.getFileReader.getSecurityContext, + context.getSession.getProjectContext.getFileReader.getCommonRoot, null, null, true); // Infer the full file name from the link (and defautl locations) LinkFileUtil.inferDataFileNameFromLinkFile( context.getSession.getProjectContext.getFileReader.resolveDataSource(linkSourceRef).asInstanceOf[StreamSource], linkMetaInfo.linkFileMeta); From 3bca7fe1076709ded6a0e0ad3aadd186f77ea7a2 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Sat, 10 Jan 2026 16:21:01 +0000 Subject: [PATCH 09/18] fix(ENGKNOW-2781): Fix invalid sec context. --- .github/workflows/build.yml | 4 ++-- gortools/src/main/scala/gorsat/Commands/Write.scala | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 315b9af5..6b2aa47d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -142,8 +142,8 @@ jobs: files: '**/TEST-*.xml' publishSnapshot: - #if: ${{ github.ref == 'refs/heads/main' }} - #needs: [test, slowTest, integrationTest] + if: ${{ github.ref == 'refs/heads/main' }} + needs: [test, slowTest, integrationTest] runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 diff --git a/gortools/src/main/scala/gorsat/Commands/Write.scala b/gortools/src/main/scala/gorsat/Commands/Write.scala index 2dbab9cc..0e27ee96 100644 --- a/gortools/src/main/scala/gorsat/Commands/Write.scala +++ b/gortools/src/main/scala/gorsat/Commands/Write.scala @@ -26,7 +26,6 @@ import java.util.zip.Deflater import gorsat.Analysis.{ForkWrite, OutputOptions} import gorsat.Commands.CommandParseUtilities._ import org.apache.commons.io.FilenameUtils -import org.gorpipe.base.security.{BundledCredentials, Credentials} import org.gorpipe.exceptions.{GorParsingException, GorResourceException} import org.gorpipe.gor.binsearch.GorIndexType import org.gorpipe.gor.driver.linkfile.LinkFileUtil @@ -36,9 +35,6 @@ import org.gorpipe.gor.session.GorContext import org.gorpipe.gor.util.DataUtil import org.slf4j.{Logger, LoggerFactory} -import java.util -import java.util.List - object Write { val log: Logger = LoggerFactory.getLogger(this.getClass) } From 0ff2a7a4cc5c2550f744d95ca9eb7fc8e373bb5f Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Sun, 11 Jan 2026 02:35:42 +0000 Subject: [PATCH 10/18] fix(ENGKNOW-2781): Fix invalid sec context. --- .github/workflows/build.yml | 4 ++-- .../gorpipe/gor/driver/linkfile/LinkFileUtil.java | 8 +++++--- .../java/org/gorpipe/gor/table/util/PathUtils.java | 4 ++-- .../gorpipe/gor/driver/linkfile/LinkFileTest.java | 14 +++++++------- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6b2aa47d..315b9af5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -142,8 +142,8 @@ jobs: files: '**/TEST-*.xml' publishSnapshot: - if: ${{ github.ref == 'refs/heads/main' }} - needs: [test, slowTest, integrationTest] + #if: ${{ github.ref == 'refs/heads/main' }} + #needs: [test, slowTest, integrationTest] runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java index f8fdb3ea..80f8950d 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java @@ -38,7 +38,7 @@ public static String inferDataFileNameFromLinkFile(StreamSource linkSource, Stri if (!Strings.isNullOrEmpty(linkDataFileParentPath)) { dataFileParentPath = linkDataFileParentPath; } else if (link.getLatestEntry() != null) { - dataFileParentPath = PathUtils.getParent(link.getLatestEntryUrl()); + dataFileParentPath = PathUtils.getParent(PathUtils.getParent(link.getLatestEntryUrl())); } if (!Strings.isNullOrEmpty(linkDataFileParentPath)) { @@ -59,9 +59,11 @@ public static String inferDataFileNameFromLinkFile(StreamSource linkSource, Stri } } - var dataFileName = PathUtils.injectRandomStringIntoFileName(PathUtils.getFileName(linkSource.getFullPath())); + var fileName = PathUtils.getFileName(linkSource.getFullPath()); + var extraFolder = PathUtils.removeExtensions(fileName); + var uniqueFileName = PathUtils.injectRandomStringIntoFileName(fileName); - return PathUtils.resolve(dataFileParentPath, dataFileName); + return PathUtils.resolve(PathUtils.resolve(dataFileParentPath, extraFolder), uniqueFileName); } private static Pattern linkPattern = Pattern.compile(".* -link ([^\\s]*) ?.*", Pattern.CASE_INSENSITIVE); diff --git a/model/src/main/java/org/gorpipe/gor/table/util/PathUtils.java b/model/src/main/java/org/gorpipe/gor/table/util/PathUtils.java index daf9ebea..5259e490 100644 --- a/model/src/main/java/org/gorpipe/gor/table/util/PathUtils.java +++ b/model/src/main/java/org/gorpipe/gor/table/util/PathUtils.java @@ -352,12 +352,12 @@ public static String injectRandomStringIntoFileName(String fileName) { String uniqId = RandomStringUtils.insecure().next(8, true, true); var linkPathSplit = fileName.indexOf('.', fileName.indexOf("/")); if (linkPathSplit > 0) { - tempFileName = "%s.%s.%s".formatted( + tempFileName = "%s_%s.%s".formatted( fileName.substring(0, linkPathSplit), uniqId, fileName.substring(linkPathSplit + 1)); } else { - tempFileName = "%s.%s".formatted(fileName, uniqId); + tempFileName = "%s_%s".formatted(fileName, uniqId); } return tempFileName.replaceAll("\\.link$", ""); diff --git a/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java b/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java index dd2191d1..e755e3cd 100644 --- a/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java +++ b/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java @@ -201,7 +201,7 @@ public void testInferDataFileNameFromLinkFile_FromEnvVariable_WithProject() thro String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource(new SourceReference("x.gor.link", null, "/projects/test", -1, null, null, false, false)), null); assertNotNull(result); - assertTrue(result.matches((root + "/test/x\\..*\\.gor").replace("/", "\\/"))); + assertTrue(result.matches((root + "/test/x/x\\..*\\.gor").replace("/", "\\/"))); } @Test @@ -210,7 +210,7 @@ public void testInferDataFileNameFromLinkFile_FromEnvVariable_WithOutProject() t environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL, root); String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource("x.gor.link"), null); - assertTrue(result.matches((root + "/x\\..*\\.gor").replace("/", "\\/"))); + assertTrue(result.matches((root + "/x/x\\..*\\.gor").replace("/", "\\/"))); } @Test @@ -218,11 +218,11 @@ public void testInferDataFileNameFromLinkFile_FromExiting_File() throws Exceptio String root = "/managed/fromfile"; String linkFilePath = "x.gor.link"; Files.createDirectory(workPath.resolve("test")); - Files.writeString(workPath.resolve("test").resolve(linkFilePath), root + "/source/y.gorz\n"); + Files.writeString(workPath.resolve("test").resolve(linkFilePath), root + "/source/y/y.gorz\n"); String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource(new SourceReference(linkFilePath, null, workPath.resolve("test").toString(), -1, null, null, false, false)), null); assertNotNull(result); - assertTrue(result.matches((root + "/source/x\\..*\\.gor").replace("/", "\\/"))); + assertTrue(result.matches((root + "/source/x/x\\..*\\.gor").replace("/", "\\/"))); } @Test @@ -233,7 +233,7 @@ public void testInferDataFileNameFromLinkFile_FromMetaParam() throws Exception { String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource(new SourceReference(linkFilePath)), linkFileMeta); assertNotNull(result); - assertTrue(result.matches((root + "/x\\..*\\.gor").replace("/", "\\/"))); + assertTrue(result.matches((root + "/x/x\\..*\\.gor").replace("/", "\\/"))); } @Test @@ -247,7 +247,7 @@ public void testInferDataFileNameFromLinkFile_FromMetaParam_ExistingFile() throw String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource(new SourceReference(linkFilePath)), linkFileMeta); assertNotNull(result); - assertTrue(result.matches((paramroot + "/x\\..*\\.gor").replace("/", "\\/"))); + assertTrue(result.matches((paramroot + "/x/x\\..*\\.gor").replace("/", "\\/"))); } @Test @@ -258,6 +258,6 @@ public void testInferDataFileNameFromLinkFile_AbsolutePath() throws Exception { String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource("/abs/path/x.gor.link"), null); assertNotNull(result); - assertTrue(result.matches((root + "/x\\..*\\.gor").replace("/", "\\/"))); + assertTrue(result.matches((root + "/x/x\\..*\\.gor").replace("/", "\\/"))); } } From cd8b1d4ca7d5347dc2f03d3781a1f019bb9ada2a Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Sun, 11 Jan 2026 17:25:17 +0000 Subject: [PATCH 11/18] fix(ENGKNOW-2781): Tweeking link cache paths. --- .../java/gorsat/process/GorJavaUtilities.java | 19 +++++++++++++++---- .../gorpipe/gor/driver/linkfile/LinkFile.java | 4 ++++ .../gor/driver/linkfile/LinkFileUtil.java | 6 +++++- .../org/gorpipe/gor/table/util/PathUtils.java | 9 ++++++--- 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/gortools/src/main/java/gorsat/process/GorJavaUtilities.java b/gortools/src/main/java/gorsat/process/GorJavaUtilities.java index 4f0b392a..20ac4771 100644 --- a/gortools/src/main/java/gorsat/process/GorJavaUtilities.java +++ b/gortools/src/main/java/gorsat/process/GorJavaUtilities.java @@ -430,19 +430,30 @@ public static void createSymbolicLinkSafe(Path resultPath, Path cachePath) throw * to the filecache integrity, but as the data pointed to might be on external source (s3, http) * and we might not have access to data (to check timestamps) in the FileCache. */ - public static String verifyLinkFileLastModified(ProjectContext projectContext, String cacheFile) { + public static String verifyLinkFileLastModified2(ProjectContext projectContext, String cacheFile) { if (cacheFile != null && DataUtil.isLink(cacheFile)) { + var invalidCacheFile = false; try { var ds = projectContext.getFileReader().resolveUrl(cacheFile); var linkLastModified = ds.getSourceMetadata().getLinkLastModified(); var lastModified = ds.getSourceMetadata().getLastModified(); if (linkLastModified != null && lastModified > linkLastModified) { - // Delete the link file (from the cache). + // Outdated link file. + invalidCacheFile = true; + } + } catch (Exception e) { + // Can not resolve the file or other errors. + invalidCacheFile = true; + } + + if (invalidCacheFile) { + log.debug("Link file {} is out of date and will be re-created.", cacheFile); + try { Files.delete(Paths.get(cacheFile)); cacheFile = null; + } catch (IOException ioException) { + // Ignore } - } catch (IOException e) { - // Ignore } } return cacheFile; diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java index fa79dbd5..ef96d488 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java @@ -123,6 +123,10 @@ public LinkFileMeta getMeta() { return meta; } + public int getSerial() { + return meta.getPropertyInt(LinkFileMeta.HEADER_SERIAL_KEY, 0); + } + public String getPath() { return source.getFullPath(); } diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java index 80f8950d..507f1ca6 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java @@ -18,6 +18,10 @@ public class LinkFileUtil { /** * Infer the data file name from the link file name. * + * Notes: The path returned must be idempotent as this is called + * from multiple different places in the code (meaning we + * can not use random or time in the path). + * * @param linkSource the link file path with the link extension * @param linkFileMeta additional link file meta data * @return the data file path @@ -61,7 +65,7 @@ public static String inferDataFileNameFromLinkFile(StreamSource linkSource, Stri var fileName = PathUtils.getFileName(linkSource.getFullPath()); var extraFolder = PathUtils.removeExtensions(fileName); - var uniqueFileName = PathUtils.injectRandomStringIntoFileName(fileName); + var uniqueFileName = PathUtils.injectStringIntoFileName(fileName, Integer.toString(link.getSerial() + 1)); return PathUtils.resolve(PathUtils.resolve(dataFileParentPath, extraFolder), uniqueFileName); } diff --git a/model/src/main/java/org/gorpipe/gor/table/util/PathUtils.java b/model/src/main/java/org/gorpipe/gor/table/util/PathUtils.java index 5259e490..fc625756 100644 --- a/model/src/main/java/org/gorpipe/gor/table/util/PathUtils.java +++ b/model/src/main/java/org/gorpipe/gor/table/util/PathUtils.java @@ -348,16 +348,19 @@ public static Path getTempFilePath(Path filePath) { } public static String injectRandomStringIntoFileName(String fileName) { + return injectStringIntoFileName(fileName, RandomStringUtils.insecure().next(8, true, true)); + } + + public static String injectStringIntoFileName(String fileName, String injectString) { var tempFileName = ""; - String uniqId = RandomStringUtils.insecure().next(8, true, true); var linkPathSplit = fileName.indexOf('.', fileName.indexOf("/")); if (linkPathSplit > 0) { tempFileName = "%s_%s.%s".formatted( fileName.substring(0, linkPathSplit), - uniqId, + injectString, fileName.substring(linkPathSplit + 1)); } else { - tempFileName = "%s_%s".formatted(fileName, uniqId); + tempFileName = "%s_%s".formatted(fileName, injectString); } return tempFileName.replaceAll("\\.link$", ""); From 5296f58fa5e465a2f86706fa79d8e1f2f1e5fad2 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Sun, 11 Jan 2026 17:33:55 +0000 Subject: [PATCH 12/18] fix(ENGKNOW-2781): Tweeking link cache paths. --- gortools/src/main/java/gorsat/process/GorJavaUtilities.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gortools/src/main/java/gorsat/process/GorJavaUtilities.java b/gortools/src/main/java/gorsat/process/GorJavaUtilities.java index 20ac4771..6f7d8365 100644 --- a/gortools/src/main/java/gorsat/process/GorJavaUtilities.java +++ b/gortools/src/main/java/gorsat/process/GorJavaUtilities.java @@ -430,7 +430,7 @@ public static void createSymbolicLinkSafe(Path resultPath, Path cachePath) throw * to the filecache integrity, but as the data pointed to might be on external source (s3, http) * and we might not have access to data (to check timestamps) in the FileCache. */ - public static String verifyLinkFileLastModified2(ProjectContext projectContext, String cacheFile) { + public static String verifyLinkFileLastModified(ProjectContext projectContext, String cacheFile) { if (cacheFile != null && DataUtil.isLink(cacheFile)) { var invalidCacheFile = false; try { From a0e21dd5f62e043f5d3f92f37743ce30c36ced33 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Sun, 11 Jan 2026 18:54:27 +0000 Subject: [PATCH 13/18] fix(ENGKNOW-2781): Tweeking link cache paths. --- gortools/src/main/scala/gorsat/Commands/Write.scala | 2 +- gortools/src/test/java/gorsat/UTestGorWrite.java | 4 ++-- .../src/main/java/org/gorpipe/gor/util/DataUtil.java | 2 +- .../gorpipe/gor/driver/linkfile/LinkFileTest.java | 12 ++++++------ 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/gortools/src/main/scala/gorsat/Commands/Write.scala b/gortools/src/main/scala/gorsat/Commands/Write.scala index 0e27ee96..8ed758cf 100644 --- a/gortools/src/main/scala/gorsat/Commands/Write.scala +++ b/gortools/src/main/scala/gorsat/Commands/Write.scala @@ -51,7 +51,7 @@ class Write extends CommandInfo("WRITE", fileName = if (fileName.isEmpty && linkOpt.nonEmpty) { val linkMetaInfo = LinkFileUtil.extractLinkMetaInfo(linkMetaOpt) - val linkSourceRef = new SourceReference(linkOpt, + val linkSourceRef = new SourceReference(DataUtil.toLink(linkOpt), context.getSession.getProjectContext.getFileReader.getSecurityContext, context.getSession.getProjectContext.getFileReader.getCommonRoot, null, null, true); // Infer the full file name from the link (and defautl locations) diff --git a/gortools/src/test/java/gorsat/UTestGorWrite.java b/gortools/src/test/java/gorsat/UTestGorWrite.java index f0be3484..fea6684a 100644 --- a/gortools/src/test/java/gorsat/UTestGorWrite.java +++ b/gortools/src/test/java/gorsat/UTestGorWrite.java @@ -311,7 +311,7 @@ public void testWriteLinkFileForGordFolderInferFilename() throws IOException { var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("dbsnp3.gord.link").toString())); Assert.assertEquals(1, linkFile.getEntriesCount()); - Assert.assertTrue(linkFile.getLatestEntry().url().matches(".*?dbsnp3\\..*?\\.gord/")); + Assert.assertTrue(linkFile.getLatestEntry().url().matches(".*?dbsnp3_.*?\\.gord/")); String linkresult1 = TestUtils.runGorPipe("gor dbsnp.gor| top 1000", "-gorroot", workDirPath.toString()); String linkresult3 = TestUtils.runGorPipe("gor dbsnp3.gord | top 1000", "-gorroot", workDirPath.toString()); @@ -327,7 +327,7 @@ public void testWriteLinkFileForGordFolderInferFilenameParallel() throws IOExcep var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("dbsnp3.gord.link").toString())); Assert.assertEquals(1, linkFile.getEntriesCount()); - Assert.assertTrue(linkFile.getLatestEntry().url().matches(".*?dbsnp3\\..*?\\.gord/")); + Assert.assertTrue(linkFile.getLatestEntry().url().matches(".*?dbsnp3_.*?\\.gord/")); String linkresult1 = TestUtils.runGorPipe("gor dbsnp.gor | top 500", "-gorroot", workDirPath.toString()); String linkresult3 = TestUtils.runGorPipe("gor dbsnp3.gord | top 500", "-gorroot", workDirPath.toString()); diff --git a/model/src/main/java/org/gorpipe/gor/util/DataUtil.java b/model/src/main/java/org/gorpipe/gor/util/DataUtil.java index abbffb24..4d2211a0 100644 --- a/model/src/main/java/org/gorpipe/gor/util/DataUtil.java +++ b/model/src/main/java/org/gorpipe/gor/util/DataUtil.java @@ -126,7 +126,7 @@ public static String toLinkFile(String name, DataType type) { } public static String toLink(String path) { - return PathUtils.stripTrailingSlash(path) + DataType.LINK.suffix; + return DataUtil.isLink(path) ? path : PathUtils.stripTrailingSlash(path) + DataType.LINK.suffix; } public static String toVersionedLink(String path) { diff --git a/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java b/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java index e755e3cd..a8f447d4 100644 --- a/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java +++ b/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java @@ -201,7 +201,7 @@ public void testInferDataFileNameFromLinkFile_FromEnvVariable_WithProject() thro String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource(new SourceReference("x.gor.link", null, "/projects/test", -1, null, null, false, false)), null); assertNotNull(result); - assertTrue(result.matches((root + "/test/x/x\\..*\\.gor").replace("/", "\\/"))); + assertTrue(result.matches((root + "/test/x/x_.*\\.gor").replace("/", "\\/"))); } @Test @@ -210,7 +210,7 @@ public void testInferDataFileNameFromLinkFile_FromEnvVariable_WithOutProject() t environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL, root); String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource("x.gor.link"), null); - assertTrue(result.matches((root + "/x/x\\..*\\.gor").replace("/", "\\/"))); + assertTrue(result.matches((root + "/x/x_.*\\.gor").replace("/", "\\/"))); } @Test @@ -222,7 +222,7 @@ public void testInferDataFileNameFromLinkFile_FromExiting_File() throws Exceptio String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource(new SourceReference(linkFilePath, null, workPath.resolve("test").toString(), -1, null, null, false, false)), null); assertNotNull(result); - assertTrue(result.matches((root + "/source/x/x\\..*\\.gor").replace("/", "\\/"))); + assertTrue(result.matches((root + "/source/x/x_.*\\.gor").replace("/", "\\/"))); } @Test @@ -233,7 +233,7 @@ public void testInferDataFileNameFromLinkFile_FromMetaParam() throws Exception { String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource(new SourceReference(linkFilePath)), linkFileMeta); assertNotNull(result); - assertTrue(result.matches((root + "/x/x\\..*\\.gor").replace("/", "\\/"))); + assertTrue(result.matches((root + "/x/x_..*\\.gor").replace("/", "\\/"))); } @Test @@ -247,7 +247,7 @@ public void testInferDataFileNameFromLinkFile_FromMetaParam_ExistingFile() throw String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource(new SourceReference(linkFilePath)), linkFileMeta); assertNotNull(result); - assertTrue(result.matches((paramroot + "/x/x\\..*\\.gor").replace("/", "\\/"))); + assertTrue(result.matches((paramroot + "/x/x_.*\\.gor").replace("/", "\\/"))); } @Test @@ -258,6 +258,6 @@ public void testInferDataFileNameFromLinkFile_AbsolutePath() throws Exception { String result = LinkFileUtil.inferDataFileNameFromLinkFile(new FileSource("/abs/path/x.gor.link"), null); assertNotNull(result); - assertTrue(result.matches((root + "/x/x\\..*\\.gor").replace("/", "\\/"))); + assertTrue(result.matches((root + "/x/x_.*\\.gor").replace("/", "\\/"))); } } From e21624cd1cca2c35b98d89b77d17929fb4293ce7 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Sun, 11 Jan 2026 23:47:21 +0000 Subject: [PATCH 14/18] fix(ENGKNOW-2781): Tweeking link cache paths. --- .../java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java index 507f1ca6..39c0b613 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java @@ -8,6 +8,7 @@ import org.gorpipe.gor.model.FileReader; import org.gorpipe.gor.table.util.PathUtils; import org.gorpipe.util.Strings; +import org.slf4j.Logger; import java.io.IOException; import java.util.regex.Matcher; @@ -15,6 +16,8 @@ public class LinkFileUtil { + private static Logger log = org.slf4j.LoggerFactory.getLogger(LinkFileUtil.class); + /** * Infer the data file name from the link file name. * @@ -67,6 +70,9 @@ public static String inferDataFileNameFromLinkFile(StreamSource linkSource, Stri var extraFolder = PathUtils.removeExtensions(fileName); var uniqueFileName = PathUtils.injectStringIntoFileName(fileName, Integer.toString(link.getSerial() + 1)); + log.warn("Inferred file name for link file {} is {}", linkSource.getFullPath(), + PathUtils.resolve(PathUtils.resolve(dataFileParentPath, extraFolder), uniqueFileName)); + return PathUtils.resolve(PathUtils.resolve(dataFileParentPath, extraFolder), uniqueFileName); } From 2531999fd2911b81bd37608c9649076122cdfc01 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Mon, 12 Jan 2026 01:30:21 +0000 Subject: [PATCH 15/18] fix(ENGKNOW-2781): Tweeking link cache paths. --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 315b9af5..5a4f854d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -143,7 +143,7 @@ jobs: publishSnapshot: #if: ${{ github.ref == 'refs/heads/main' }} - #needs: [test, slowTest, integrationTest] + # needs: [test, slowTest, integrationTest] runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 From 93fd06297b1727d85b8ef5637e38066f7837e381 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Mon, 12 Jan 2026 03:17:31 +0000 Subject: [PATCH 16/18] fix(ENGKNOW-2781): Tweeking link cache paths. --- .../main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gortools/src/main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala b/gortools/src/main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala index cc4d4f7b..9e0898dd 100644 --- a/gortools/src/main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala +++ b/gortools/src/main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala @@ -148,7 +148,7 @@ class GeneralQueryHandler(context: GorContext, header: Boolean) extends GorParal } else { generateDictionaryFile(commandToExecute, fileRoot, fileReader, useMd5, cacheFile) nested.cached(cacheFile) - cacheFile + "" //cacheFile } } catch { case gue: GorUserException => From 3fe36a72d797966a6753d685f082b41a252eeb21 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Mon, 12 Jan 2026 03:31:19 +0000 Subject: [PATCH 17/18] fix(ENGKNOW-2781): Tweeking link cache paths. --- .../main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gortools/src/main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala b/gortools/src/main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala index 9e0898dd..cc4d4f7b 100644 --- a/gortools/src/main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala +++ b/gortools/src/main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala @@ -148,7 +148,7 @@ class GeneralQueryHandler(context: GorContext, header: Boolean) extends GorParal } else { generateDictionaryFile(commandToExecute, fileRoot, fileReader, useMd5, cacheFile) nested.cached(cacheFile) - "" //cacheFile + cacheFile } } catch { case gue: GorUserException => From 6be2d8d8fb40a5fef0be44ac6f4552e055d2bb62 Mon Sep 17 00:00:00 2001 From: Gisli Magnusson Date: Mon, 12 Jan 2026 11:48:21 +0000 Subject: [PATCH 18/18] fix(ENGKNOW-2781): Tweeking link cache paths. --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5a4f854d..6b2aa47d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -142,8 +142,8 @@ jobs: files: '**/TEST-*.xml' publishSnapshot: - #if: ${{ github.ref == 'refs/heads/main' }} - # needs: [test, slowTest, integrationTest] + if: ${{ github.ref == 'refs/heads/main' }} + needs: [test, slowTest, integrationTest] runs-on: ubuntu-latest steps: - uses: actions/checkout@v4