From ff4cd66c81a3df4f1d43796384fe6ee777772979 Mon Sep 17 00:00:00 2001 From: Bertrand Martin Date: Thu, 16 Apr 2026 14:00:58 -0400 Subject: [PATCH 1/6] Add optional gawk-style arrays of arrays - Add allowArraysOfArrays to AwkSettings (default: true) - Thread the flag into Awk.compile() and Awk.compileExpression() - Update AwkParser to parse chained-bracket syntax a[i][j] when enabled - Introduce new opcodes for nested array lvalue operations - Extend AVM to execute nested-array tuples at runtime - Cover reads, writes, in, for-in, delete, ++/--, compound assignment - Add regression tests in AwkTest and AwkParserTest - Update documentation (README, compatibility.md, java.md, java-compile.md) Closes #438 --- src/main/java/io/jawk/Awk.java | 4 +- src/main/java/io/jawk/backend/AVM.java | 217 +++++++++- src/main/java/io/jawk/frontend/AwkParser.java | 383 +++++++++++++----- .../java/io/jawk/intermediate/AwkTuples.java | 89 ++++ .../java/io/jawk/intermediate/Opcode.java | 101 +++++ src/main/java/io/jawk/util/AwkSettings.java | 35 ++ src/site/markdown/compatibility.md | 1 + src/site/markdown/java-compile.md | 10 + src/site/markdown/java.md | 10 + src/test/java/io/jawk/AwkParserTest.java | 11 + src/test/java/io/jawk/AwkTest.java | 117 ++++++ .../java/io/jawk/PosixConformanceTest.java | 3 - 12 files changed, 862 insertions(+), 119 deletions(-) diff --git a/src/main/java/io/jawk/Awk.java b/src/main/java/io/jawk/Awk.java index 0ae77728..3bcb3084 100644 --- a/src/main/java/io/jawk/Awk.java +++ b/src/main/java/io/jawk/Awk.java @@ -472,7 +472,7 @@ protected final T compileProgram( lastAst = null; if (!scripts.isEmpty()) { // Parse all script sources into a single AST - AwkParser parser = new AwkParser(this.extensionFunctions); + AwkParser parser = new AwkParser(this.extensionFunctions, settings.isAllowArraysOfArrays()); AstNode ast = parser.parse(scripts); lastAst = ast; if (ast != null) { @@ -539,7 +539,7 @@ protected final T compileExpression( new StringReader(expression)); // Parse the expression - AwkParser parser = new AwkParser(this.extensionFunctions); + AwkParser parser = new AwkParser(this.extensionFunctions, settings.isAllowArraysOfArrays()); AstNode ast = parser.parseExpression(expressionSource); // Attempt to traverse the syntax tree and build diff --git a/src/main/java/io/jawk/backend/AVM.java b/src/main/java/io/jawk/backend/AVM.java index d183c1d7..838fe495 100644 --- a/src/main/java/io/jawk/backend/AVM.java +++ b/src/main/java/io/jawk/backend/AVM.java @@ -649,7 +649,12 @@ private void executeTuples(PositionTracker position) // display $0 push(jrt.jrtGetInputField(0).toString().length()); } else { - push(pop().toString().length()); + Object value = pop(); + if (value instanceof Map) { + push((long) ((Map) value).size()); + } else { + push(jrt.toAwkString(value).length()); + } } position.next(); break; @@ -804,6 +809,20 @@ private void executeTuples(PositionTracker position) position.next(); break; } + case ASSIGN_MAP_ELEMENT: { + // stack[0] = array index + // stack[1] = associative array + // stack[2] = value + Object arrIdx = pop(); + Map array = toMap(pop()); + Object rhs = pop(); + if (rhs == null) { + rhs = BLANK; + } + assignMapElement(array, arrIdx, rhs); + position.next(); + break; + } case PLUS_EQ_ARRAY: case MINUS_EQ_ARRAY: case MULT_EQ_ARRAY: @@ -825,6 +844,7 @@ private void executeTuples(PositionTracker position) double val = JRT.toDouble(rhs); Map array = ensureMapVariable(offset, isGlobal); + checkScalar(arrIdx); Object o = array.get(arrIdx); double origVal = JRT.toDouble(o); @@ -861,6 +881,59 @@ private void executeTuples(PositionTracker position) position.next(); break; } + case PLUS_EQ_MAP_ELEMENT: + case MINUS_EQ_MAP_ELEMENT: + case MULT_EQ_MAP_ELEMENT: + case DIV_EQ_MAP_ELEMENT: + case MOD_EQ_MAP_ELEMENT: + case POW_EQ_MAP_ELEMENT: { + // stack[0] = array index + // stack[1] = associative array + // stack[2] = value + Object arrIdx = pop(); + Map array = toMap(pop()); + Object rhs = pop(); + if (rhs == null) { + rhs = BLANK; + } + + double val = JRT.toDouble(rhs); + checkScalar(arrIdx); + Object o = array.get(arrIdx); + double origVal = JRT.toDouble(o); + double newVal; + + switch (opcode) { + case PLUS_EQ_MAP_ELEMENT: + newVal = origVal + val; + break; + case MINUS_EQ_MAP_ELEMENT: + newVal = origVal - val; + break; + case MULT_EQ_MAP_ELEMENT: + newVal = origVal * val; + break; + case DIV_EQ_MAP_ELEMENT: + newVal = origVal / val; + break; + case MOD_EQ_MAP_ELEMENT: + newVal = origVal % val; + break; + case POW_EQ_MAP_ELEMENT: + newVal = Math.pow(origVal, val); + break; + default: + throw new Error("Invalid op code here: " + opcode); + } + + if (JRT.isActuallyLong(newVal)) { + assignMapElement(array, arrIdx, (long) Math.rint(newVal)); + } else { + assignMapElement(array, arrIdx, newVal); + } + position.next(); + break; + } case ASSIGN_AS_INPUT: { // stack[0] = value @@ -1020,6 +1093,7 @@ private void executeTuples(PositionTracker position) boolean isGlobal = position.boolArg(1); Map aa = ensureMapVariable(position.intArg(0), isGlobal); Object key = pop(); + checkScalar(key); Object o = aa.get(key); double ans = JRT.toDouble(o) + 1; if (JRT.isActuallyLong(ans)) { @@ -1037,6 +1111,39 @@ private void executeTuples(PositionTracker position) boolean isGlobal = position.boolArg(1); Map aa = ensureMapVariable(position.intArg(0), isGlobal); Object key = pop(); + checkScalar(key); + Object o = aa.get(key); + double ans = JRT.toDouble(o) - 1; + if (JRT.isActuallyLong(ans)) { + aa.put(key, (long) Math.rint(ans)); + } else { + aa.put(key, ans); + } + position.next(); + break; + } + case INC_MAP_REF: { + // stack[0] = array index + // stack[1] = associative array + Object key = pop(); + checkScalar(key); + Map aa = toMap(pop()); + Object o = aa.get(key); + double ans = JRT.toDouble(o) + 1; + if (JRT.isActuallyLong(ans)) { + aa.put(key, (long) Math.rint(ans)); + } else { + aa.put(key, ans); + } + position.next(); + break; + } + case DEC_MAP_REF: { + // stack[0] = array index + // stack[1] = associative array + Object key = pop(); + checkScalar(key); + Map aa = toMap(pop()); Object o = aa.get(key); double ans = JRT.toDouble(o) - 1; if (JRT.isActuallyLong(ans)) { @@ -1105,17 +1212,22 @@ private void executeTuples(PositionTracker position) case DEREF_ARRAY: { // stack[0] = array index Object idx = pop(); // idx - Object array = pop(); // map - if (!(array instanceof Map)) { - throw new AwkRuntimeException("Attempting to index a non-associative-array."); - } - @SuppressWarnings("unchecked") - Map map = (Map) array; + checkScalar(idx); + Map map = toMap(pop()); Object o = JRT.getAwkValue(map, idx); push(o); position.next(); break; } + case ENSURE_ARRAY_ELEMENT: { + // stack[0] = array index + // stack[1] = associative array + Object idx = pop(); + Map map = toMap(pop()); + push(ensureArrayInArray(map, idx)); + position.next(); + break; + } case SRAND: { // arg[0] = numArgs (where 0 = no args, anything else = one argument) // stack[0] = seed (only if numArgs != 0) @@ -1322,6 +1434,21 @@ private void executeTuples(PositionTracker position) position.next(); break; } + case SUB_FOR_MAP_REFERENCE: { + // arg[0] = isGsub + // stack[0] = array index + // stack[1] = associative array + // stack[2] = original variable value + // stack[3] = replacement string + // stack[4] = ere + Object arrIdx = pop(); + Map array = toMap(pop()); + String newString = execSubOrGSub(position, 0); + assignMapElement(array, arrIdx, newString); + pop(); + position.next(); + break; + } case SPLIT: { // arg[0] = num args // stack[0] = field_sep (only if num args == 3) @@ -1849,14 +1976,20 @@ private void executeTuples(PositionTracker position) long count = position.intArg(0); // String s; if (count == 1) { - push(jrt.toAwkString(pop())); + Object value = pop(); + checkScalar(value); + push(jrt.toAwkString(value)); } else { StringBuilder sb = new StringBuilder(); - sb.append(jrt.toAwkString(pop())); + Object value = pop(); + checkScalar(value); + sb.append(jrt.toAwkString(value)); String subsep = jrt.toAwkString(jrt.getSUBSEPVar()); for (int i = 1; i < count; i++) { sb.insert(0, subsep); - sb.insert(0, jrt.toAwkString(pop())); + value = pop(); + checkScalar(value); + sb.insert(0, jrt.toAwkString(value)); } push(sb.toString()); } @@ -1871,12 +2004,23 @@ private void executeTuples(PositionTracker position) boolean isGlobal = position.boolArg(1); Map aa = getMapVariable(offset, isGlobal); Object key = pop(); + checkScalar(key); if (aa != null) { aa.remove(key); } position.next(); break; } + case DELETE_MAP_ELEMENT: { + // stack[0] = array index + // stack[1] = associative array + Object key = pop(); + checkScalar(key); + Map aa = toMap(pop()); + aa.remove(key); + position.next(); + break; + } case DELETE_ARRAY: { // arg[0] = offset // arg[1] = isGlobal @@ -1954,6 +2098,7 @@ private void executeTuples(PositionTracker position) // stack[1] = key to check Object arr = pop(); Object arg = pop(); + checkScalar(arg); if (!(arr instanceof Map)) { throw new AwkRuntimeException("Attempting to test membership on a non-associative-array."); } @@ -2378,7 +2523,11 @@ private void assign(long l, Object value, boolean isGlobal, PositionTracker posi * Awk array element assignment functionality. */ private void assignArray(long offset, Object arrIdx, Object rhs, boolean isGlobal) { - Map array = ensureMapVariable(offset, isGlobal); + assignMapElement(ensureMapVariable(offset, isGlobal), arrIdx, rhs); + } + + private void assignMapElement(Map array, Object arrIdx, Object rhs) { + checkScalar(arrIdx); array.put(arrIdx, rhs); push(rhs); } @@ -2616,6 +2765,13 @@ private Map getMapVariable(long offset, boolean isGlobal) { return toMap(value); } + /** + * Casts an AWK value to an associative array. + * + * @param value value to validate + * @return the associative array value + * @throws AwkRuntimeException when {@code value} is scalar + */ private Map toMap(Object value) { if (!(value instanceof Map)) { throw new AwkRuntimeException("Attempting to treat a scalar as an array."); @@ -2625,6 +2781,45 @@ private Map toMap(Object value) { return map; } + /** + * Ensures a value is scalar before using it in a scalar-only context such as + * a subscript component. + * + * @param value value to validate + * @throws AwkRuntimeException when {@code value} is an array + */ + private void checkScalar(Object value) { + if (value instanceof Map) { + throw new AwkRuntimeException("Attempting to use an array in a scalar context."); + } + } + + /** + * Returns the nested associative array stored in {@code map[key]}, creating it + * when the key is undefined. + * + * @param map containing array + * @param key nested-array key + * @return the nested associative array stored at {@code key} + * @throws AwkRuntimeException when {@code key} is scalar-incompatible or when + * the existing slot contains a scalar + */ + private Map ensureArrayInArray(Map map, Object key) { + checkScalar(key); + Object value = JRT.getAwkValue(map, key); + if (value == null || value.equals(BLANK) || value instanceof UninitializedObject) { + Map nested = newAwkArray(); + map.put(key, nested); + return nested; + } + if (!(value instanceof Map)) { + throw new AwkRuntimeException("Attempting to use a scalar as an array."); + } + @SuppressWarnings("unchecked") + Map nested = (Map) value; + return nested; + } + private static final UninitializedObject BLANK = new UninitializedObject(); private static final class SingleRecordInputSource implements InputSource { diff --git a/src/main/java/io/jawk/frontend/AwkParser.java b/src/main/java/io/jawk/frontend/AwkParser.java index 3b0c1f4f..c22a552e 100644 --- a/src/main/java/io/jawk/frontend/AwkParser.java +++ b/src/main/java/io/jawk/frontend/AwkParser.java @@ -267,6 +267,7 @@ enum Token { private final AwkSymbolTableImpl symbolTable = new AwkSymbolTableImpl(); private final Map extensions; + private final boolean allowArraysOfArrays; /** *

@@ -275,8 +276,9 @@ enum Token { * * @param extensions a {@link java.util.Map} object */ - public AwkParser(Map extensions) { + public AwkParser(Map extensions, boolean allowArraysOfArrays) { this.extensions = extensions == null ? Collections.emptyMap() : new HashMap<>(extensions); + this.allowArraysOfArrays = allowArraysOfArrays; } private List scriptSources; @@ -1554,10 +1556,17 @@ AST SYMBOL(boolean allowComparison, boolean allowInKeyword) throws IOException { lexer(); AST idxAst = ARRAY_INDEX(true, allowInKeyword); lexer(Token.CLOSE_BRACKET); - if (token == Token.OPEN_BRACKET) { + AST arrayReference = symbolTable.addArrayReference(id, idxAst); + if (!allowArraysOfArrays && token == Token.OPEN_BRACKET) { throw parserException("Use [a,b,c,...] instead of [a][b][c]... for multi-dimensional arrays."); } - return symbolTable.addArrayReference(id, idxAst); + while (allowArraysOfArrays && token == Token.OPEN_BRACKET) { + lexer(); + idxAst = ARRAY_INDEX(true, allowInKeyword); + lexer(Token.CLOSE_BRACKET); + arrayReference = new ArrayReferenceAst(arrayReference, idxAst); + } + return arrayReference; } return symbolTable.addID(id); } @@ -1747,21 +1756,15 @@ AST FOR_STATEMENT() throws IOException { } // in lexer(); - // id if (token != Token.ID) { throw parserException( - "Expecting an ARRAY Token.ID for 'in' statement. Got " + token.name() + ": " + text); + "Expecting an array or subarray for 'in' statement. Got " + token.name() + ": " + text); } - String arrId = text.toString(); - - // not an indexed array reference! - AST arrayIdAst = symbolTable.addArrayID(arrId); - - lexer(); + AST arrayAst = SYMBOL(true, true); // close paren ... lexer(Token.CLOSE_PAREN); AST block = BLOCK_OR_STMT(); - return new ForInStatementAst(expr1, arrayIdAst, block); + return new ForInStatementAst(expr1, arrayAst, block); } if (token == Token.SEMICOLON) { @@ -2051,6 +2054,69 @@ private void expectKeyword(String keyword) throws IOException { } } + private void populateArrayOperandTuples( + AST arrayAst, + AwkTuples tuples, + boolean createIfMissing, + String errorMessage) { + if (arrayAst instanceof IDAst) { + IDAst idAst = (IDAst) arrayAst; + if (idAst.isScalar()) { + arrayAst.throwSemanticException(errorMessage); + } + idAst.setArray(true); + idAst.populateTuples(tuples); + return; + } + if (arrayAst instanceof ArrayReferenceAst) { + ((ArrayReferenceAst) arrayAst).populateArrayValueTuples(tuples, createIfMissing); + return; + } + arrayAst.throwSemanticException(errorMessage); + } + + private int populateActualParameters( + AwkTuples tuples, + FunctionCallParamListAst params, + Set arrayParameterIndexes, + int parameterIndex) { + if (params == null) { + return 0; + } + if (arrayParameterIndexes.contains(Integer.valueOf(parameterIndex))) { + populateArrayOperandTuples( + params.getAst1(), + tuples, + true, + "Parameter position " + parameterIndex + " must be an array or subarray."); + } else { + params.getAst1().populateTuples(tuples); + } + if (params.getAst2() == null) { + return 1; + } + return 1 + populateActualParameters( + tuples, + (FunctionCallParamListAst) params.getAst2(), + arrayParameterIndexes, + parameterIndex + 1); + } + + private Set collectArrayParameterIndexes(FunctionDefAst functionDefAst) { + Set arrayIndexes = new HashSet(); + FunctionDefParamListAst fPtr = (FunctionDefParamListAst) functionDefAst.getAst1(); + int index = 0; + while (fPtr != null) { + IDAst fparam = symbolTable.getFunctionParameterIDAST(functionDefAst.id, fPtr.id); + if (fparam.isArray()) { + arrayIndexes.add(Integer.valueOf(index)); + } + fPtr = (FunctionDefParamListAst) fPtr.getAst1(); + index++; + } + return arrayIndexes; + } + // parser // =============================================================================== // AST class defs @@ -2996,15 +3062,9 @@ public Address continueAddress() { public int populateTuples(AwkTuples tuples) { pushSourceLineNumber(tuples); - IDAst arrayIdAst = (IDAst) getAst2(); - if (arrayIdAst.isScalar()) { - throw new SemanticException(arrayIdAst + " is not an array"); - } - arrayIdAst.setArray(true); - breakAddress = tuples.createAddress("breakAddress"); - getAst2().populateTuples(tuples); + populateArrayOperandTuples(getAst2(), tuples, true, getAst2() + " is not an array"); // pops the array and pushes the keyset tuples.keylist(); @@ -3182,27 +3242,28 @@ public int populateTuples(AwkTuples tuples) { } } else if (getAst1() instanceof ArrayReferenceAst) { ArrayReferenceAst arr = (ArrayReferenceAst) getAst1(); - // push the index - arr.getAst2().populateTuples(tuples); // push the array ref itself - IDAst idAst = (IDAst) arr.getAst1(); - if (idAst.isScalar()) { - throw new SemanticException("Cannot use " + idAst + " as an array. It is a scalar."); + if (arr.getAst1() instanceof IDAst) { + IDAst idAst = (IDAst) arr.getAst1(); + if (idAst.isScalar()) { + throw new SemanticException("Cannot use " + idAst + " as an array. It is a scalar."); + } + idAst.setArray(true); } - idAst.setArray(true); + arr.populateTargetReferenceTuples(tuples); if (op == Token.EQUALS) { - tuples.assignArray(idAst.offset, idAst.isGlobal); + tuples.assignMapElement(); } else if (op == Token.PLUS_EQ) { - tuples.plusEqArray(idAst.offset, idAst.isGlobal); + tuples.plusEqMapElement(); } else if (op == Token.MINUS_EQ) { - tuples.minusEqArray(idAst.offset, idAst.isGlobal); + tuples.minusEqMapElement(); } else if (op == Token.MULT_EQ) { - tuples.multEqArray(idAst.offset, idAst.isGlobal); + tuples.multEqMapElement(); } else if (op == Token.DIV_EQ) { - tuples.divEqArray(idAst.offset, idAst.isGlobal); + tuples.divEqMapElement(); } else if (op == Token.MOD_EQ) { - tuples.modEqArray(idAst.offset, idAst.isGlobal); + tuples.modEqMapElement(); } else if (op == Token.POW_EQ) { - tuples.powEqArray(idAst.offset, idAst.isGlobal); + tuples.powEqMapElement(); } else { throw new NotImplementedError("Unhandled op: " + op + " / " + text + " for arrays."); } @@ -3343,17 +3404,12 @@ private InExpressionAst(AST arg, AST arr) { @Override public int populateTuples(AwkTuples tuples) { pushSourceLineNumber(tuples); - if (!(getAst2() instanceof IDAst)) { + if (!(getAst2() instanceof IDAst) && !(getAst2() instanceof ArrayReferenceAst)) { throw new SemanticException("Expecting an array for rhs of IN. Got an expression."); } - IDAst arrAst = (IDAst) getAst2(); - if (arrAst.isScalar()) { - throw new SemanticException("Expecting an array for rhs of IN. Got a scalar."); - } - arrAst.setArray(true); getAst1().populateTuples(tuples); - arrAst.populateTuples(tuples); + populateArrayOperandTuples(getAst2(), tuples, true, "Expecting an array for rhs of IN. Got a scalar."); tuples.isIn(); popSourceLineNumber(tuples); @@ -3722,26 +3778,30 @@ void checkActualToFormalParameters(AST actualParamList) { // formal function parameter AST fparam = symbolTable.getFunctionParameterIDAST(id, fPtr.id); - if (aparam.isArray() && fparam.isScalar()) { - aparam - .throwSemanticException( - id + ": Actual parameter (" + aparam + ") is an array, but formal parameter is used like a scalar."); - } - if (aparam.isScalar() && fparam.isArray()) { - aparam - .throwSemanticException( - id + ": Actual parameter (" + aparam + ") is a scalar, but formal parameter is used like an array."); - } - // condition parameters appropriately - // (based on function parameter semantics) - if (aparam instanceof IDAst) { - IDAst aparamIdAst = (IDAst) aparam; - if (fparam.isScalar()) { - aparamIdAst.setScalar(true); - } - if (fparam.isArray()) { + if (fparam.isArray()) { + if (aparam instanceof IDAst) { + IDAst aparamIdAst = (IDAst) aparam; + if (aparamIdAst.isScalar()) { + aparam + .throwSemanticException( + id + ": Actual parameter (" + aparam + + ") is a scalar, but formal parameter is used like an array."); + } aparamIdAst.setArray(true); + } else if (!(aparam instanceof ArrayReferenceAst)) { + aparam + .throwSemanticException( + id + ": Actual parameter (" + aparam + ") is not an array or subarray reference."); } + } else if (fparam.isScalar() && aparam instanceof IDAst) { + IDAst aparamIdAst = (IDAst) aparam; + if (aparamIdAst.isArray()) { + aparam + .throwSemanticException( + id + ": Actual parameter (" + aparam + + ") is an array, but formal parameter is used like a scalar."); + } + aparamIdAst.setScalar(true); } // next aPtr = aPtr.getAst2(); @@ -3819,7 +3879,11 @@ public int populateTuples(AwkTuples tuples) { if (getAst1() == null) { actualParamCountLocal = 0; } else { - actualParamCountLocal = getAst1().populateTuples(tuples); + actualParamCountLocal = populateActualParameters( + tuples, + (FunctionCallParamListAst) getAst1(), + collectArrayParameterIndexes(functionProxy.functionDefAst), + 0); } int formalParamCount = functionProxy.getFunctionParamCount(); if (formalParamCount < actualParamCountLocal) { @@ -3994,8 +4058,24 @@ public int populateTuples(AwkTuples tuples) { throw new SemanticException("sub needs at least 2 arguments"); } boolean isGsub = fIdx == BUILTIN_FUNC_NAMES.get("gsub"); + int numargs = 0; + for (AST paramPtr = getAst1(); paramPtr != null; paramPtr = paramPtr.getAst2()) { + numargs++; + } + if (numargs != 2 && numargs != 3) { + throw new SemanticException("sub requires 2 or 3 arguments, not " + numargs); + } - int numargs = getAst1().populateTuples(tuples); + getAst1().getAst1().populateTuples(tuples); + getAst1().getAst2().getAst1().populateTuples(tuples); + if (numargs == 3) { + AST targetAst = getAst1().getAst2().getAst2().getAst1(); + if (targetAst instanceof ArrayReferenceAst) { + ((ArrayReferenceAst) targetAst).populateTargetValueTuples(tuples); + } else { + targetAst.populateTuples(tuples); + } + } // stack contains arg1,arg2[,arg3] - in that pop() order @@ -4012,13 +4092,15 @@ public int populateTuples(AwkTuples tuples) { tuples.subForVariable(idAst.offset, idAst.isGlobal, isGsub); } else if (ptr instanceof ArrayReferenceAst) { ArrayReferenceAst arrAst = (ArrayReferenceAst) ptr; - // push the index - arrAst.getAst2().populateTuples(tuples); - IDAst idAst = (IDAst) arrAst.getAst1(); - if (idAst.isScalar()) { - throw new SemanticException("Cannot use " + idAst + " as an array."); + if (arrAst.getAst1() instanceof IDAst) { + IDAst idAst = (IDAst) arrAst.getAst1(); + if (idAst.isScalar()) { + throw new SemanticException("Cannot use " + idAst + " as an array."); + } + idAst.setArray(true); } - tuples.subForArrayReference(idAst.offset, idAst.isGlobal, isGsub); + arrAst.populateTargetReferenceTuples(tuples); + tuples.subForMapReference(isGsub); } else if (ptr instanceof DollarExpressionAst) { // push the field ref DollarExpressionAst dollarExpr = (DollarExpressionAst) ptr; @@ -4041,19 +4123,34 @@ public int populateTuples(AwkTuples tuples) { throw new SemanticException("split needs at least 2 arguments"); } AST ptr = getAst1().getAst2().getAst1(); - if (!(ptr instanceof IDAst)) { - throw new SemanticException("split needs an array name as its 2nd argument"); + if (!(ptr instanceof IDAst) && !(ptr instanceof ArrayReferenceAst)) { + throw new SemanticException("split needs an array or subarray reference as its 2nd argument"); } - IDAst arrAst = (IDAst) ptr; - if (arrAst.isScalar()) { - throw new SemanticException("split's 2nd arg cannot be a scalar"); + if (ptr instanceof IDAst) { + IDAst arrAst = (IDAst) ptr; + if (arrAst.isScalar()) { + throw new SemanticException("split's 2nd arg cannot be a scalar"); + } + arrAst.setArray(true); } - arrAst.setArray(true); - int ast1Result = getAst1().populateTuples(tuples); + int ast1Result = 0; + for (AST paramPtr = getAst1(); paramPtr != null; paramPtr = paramPtr.getAst2()) { + ast1Result++; + } if (ast1Result != 2 && ast1Result != 3) { throw new SemanticException("split requires 2 or 3 arguments, not " + ast1Result); } + + getAst1().getAst1().populateTuples(tuples); + populateArrayOperandTuples( + ptr, + tuples, + true, + "split's 2nd arg must be an array or subarray reference"); + if (ast1Result == 3) { + getAst1().getAst2().getAst2().getAst1().populateTuples(tuples); + } tuples.split(ast1Result); popSourceLineNumber(tuples); return 1; @@ -4304,6 +4401,41 @@ public int populateTuples(AwkTuples tuples) { popSourceLineNumber(tuples); return 1; } + + private void populateTargetReferenceTuples(AwkTuples tuples) { + pushSourceLineNumber(tuples); + populateContainerTuples(tuples); + getAst2().populateTuples(tuples); + popSourceLineNumber(tuples); + } + + private void populateArrayValueTuples(AwkTuples tuples, boolean createIfMissing) { + pushSourceLineNumber(tuples); + populateContainerTuples(tuples); + getAst2().populateTuples(tuples); + if (createIfMissing) { + tuples.ensureArrayElement(); + } else { + tuples.dereferenceArray(); + } + popSourceLineNumber(tuples); + } + + private void populateTargetValueTuples(AwkTuples tuples) { + pushSourceLineNumber(tuples); + populateContainerTuples(tuples); + getAst2().populateTuples(tuples); + tuples.dereferenceArray(); + popSourceLineNumber(tuples); + } + + private void populateContainerTuples(AwkTuples tuples) { + if (getAst1() instanceof ArrayReferenceAst) { + ((ArrayReferenceAst) getAst1()).populateArrayValueTuples(tuples, true); + } else { + getAst1().populateTuples(tuples); + } + } } private final class IntegerAst extends ScalarExpressionAst { @@ -4473,9 +4605,15 @@ public int populateTuples(AwkTuples tuples) { tuples.inc(idAst.offset, idAst.isGlobal); } else if (getAst1() instanceof ArrayReferenceAst) { ArrayReferenceAst arrAst = (ArrayReferenceAst) getAst1(); - IDAst idAst = (IDAst) arrAst.getAst1(); - arrAst.getAst2().populateTuples(tuples); - tuples.incArrayRef(idAst.offset, idAst.isGlobal); + if (arrAst.getAst1() instanceof IDAst) { + IDAst idAst = (IDAst) arrAst.getAst1(); + if (idAst.isScalar()) { + throw new SemanticException("Cannot use " + idAst + " as an array."); + } + idAst.setArray(true); + } + arrAst.populateTargetReferenceTuples(tuples); + tuples.incMapRef(); } else if (getAst1() instanceof DollarExpressionAst) { DollarExpressionAst dollarExpr = (DollarExpressionAst) getAst1(); dollarExpr.getAst1().populateTuples(tuples); // OPTIMIATION: duplicate the x in $x here @@ -4515,9 +4653,15 @@ public int populateTuples(AwkTuples tuples) { tuples.dec(idAst.offset, idAst.isGlobal); } else if (getAst1() instanceof ArrayReferenceAst) { ArrayReferenceAst arrAst = (ArrayReferenceAst) getAst1(); - IDAst idAst = (IDAst) arrAst.getAst1(); - arrAst.getAst2().populateTuples(tuples); - tuples.decArrayRef(idAst.offset, idAst.isGlobal); + if (arrAst.getAst1() instanceof IDAst) { + IDAst idAst = (IDAst) arrAst.getAst1(); + if (idAst.isScalar()) { + throw new SemanticException("Cannot use " + idAst + " as an array."); + } + idAst.setArray(true); + } + arrAst.populateTargetReferenceTuples(tuples); + tuples.decMapRef(); } else if (getAst1() instanceof DollarExpressionAst) { DollarExpressionAst dollarExpr = (DollarExpressionAst) getAst1(); dollarExpr.getAst1().populateTuples(tuples); // OPTIMIATION: duplicate the x in $x here @@ -4555,15 +4699,25 @@ public int populateTuples(AwkTuples tuples) { dollarExpr.getAst1().populateTuples(tuples); tuples.incDollarRef(); } else { - getAst1().populateTuples(tuples); + if (getAst1() instanceof ArrayReferenceAst) { + ((ArrayReferenceAst) getAst1()).populateTargetValueTuples(tuples); + } else { + getAst1().populateTuples(tuples); + } if (getAst1() instanceof IDAst) { IDAst idAst = (IDAst) getAst1(); tuples.postInc(idAst.offset, idAst.isGlobal); } else if (getAst1() instanceof ArrayReferenceAst) { ArrayReferenceAst arrAst = (ArrayReferenceAst) getAst1(); - IDAst idAst = (IDAst) arrAst.getAst1(); - arrAst.getAst2().populateTuples(tuples); - tuples.incArrayRef(idAst.offset, idAst.isGlobal); + if (arrAst.getAst1() instanceof IDAst) { + IDAst idAst = (IDAst) arrAst.getAst1(); + if (idAst.isScalar()) { + throw new SemanticException("Cannot use " + idAst + " as an array."); + } + idAst.setArray(true); + } + arrAst.populateTargetReferenceTuples(tuples); + tuples.incMapRef(); } else { throw new NotImplementedError("unhandled postinc for " + getAst1()); } @@ -4582,15 +4736,25 @@ private PostDecAst(AST symbolAst) { @Override public int populateTuples(AwkTuples tuples) { pushSourceLineNumber(tuples); - getAst1().populateTuples(tuples); + if (getAst1() instanceof ArrayReferenceAst) { + ((ArrayReferenceAst) getAst1()).populateTargetValueTuples(tuples); + } else { + getAst1().populateTuples(tuples); + } if (getAst1() instanceof IDAst) { IDAst idAst = (IDAst) getAst1(); tuples.postDec(idAst.offset, idAst.isGlobal); } else if (getAst1() instanceof ArrayReferenceAst) { ArrayReferenceAst arrAst = (ArrayReferenceAst) getAst1(); - IDAst idAst = (IDAst) arrAst.getAst1(); - arrAst.getAst2().populateTuples(tuples); - tuples.decArrayRef(idAst.offset, idAst.isGlobal); + if (arrAst.getAst1() instanceof IDAst) { + IDAst idAst = (IDAst) arrAst.getAst1(); + if (idAst.isScalar()) { + throw new SemanticException("Cannot use " + idAst + " as an array."); + } + idAst.setArray(true); + } + arrAst.populateTargetReferenceTuples(tuples); + tuples.decMapRef(); } else if (getAst1() instanceof DollarExpressionAst) { DollarExpressionAst dollarExpr = (DollarExpressionAst) getAst1(); dollarExpr.getAst1().populateTuples(tuples); @@ -4676,13 +4840,11 @@ public int populateTuples(AwkTuples tuples) { if (getAst1() == null) { paramCount = 0; } else { + Set arrayIndexes = new HashSet(); for (int idx : reqArrayIdxs) { - AST paramAst = getParamAst((FunctionCallParamListAst) getAst1(), idx); - // if the parameter is an IDAst... - if (paramAst.getAst1() instanceof IDAst) { - // then force it to be an array, - // or complain if it is already tagged as a scalar - IDAst idAst = (IDAst) paramAst.getAst1(); + AST paramAst = getParamAst((FunctionCallParamListAst) getAst1(), idx).getAst1(); + if (paramAst instanceof IDAst) { + IDAst idAst = (IDAst) paramAst; if (idAst.isScalar()) { throw new SemanticException( "Extension '" @@ -4692,10 +4854,17 @@ public int populateTuples(AwkTuples tuples) { + " be an associative array, not a scalar."); } idAst.setArray(true); + arrayIndexes.add(Integer.valueOf(idx)); + } else if (paramAst instanceof ArrayReferenceAst) { + arrayIndexes.add(Integer.valueOf(idx)); } } - paramCount = getAst1().populateTuples(tuples); + paramCount = populateActualParameters( + tuples, + (FunctionCallParamListAst) getAst1(), + arrayIndexes, + 0); } // isInitial == true :: // retval of this extension is not a function parameter @@ -4823,10 +4992,15 @@ public int populateTuples(AwkTuples tuples) { } } else if (getAst2() instanceof ArrayReferenceAst) { ArrayReferenceAst arr = (ArrayReferenceAst) getAst2(); - // push the index - arr.getAst2().populateTuples(tuples); // push the array ref itself - IDAst idAst = (IDAst) arr.getAst1(); - tuples.assignArray(idAst.offset, idAst.isGlobal); + if (arr.getAst1() instanceof IDAst) { + IDAst idAst = (IDAst) arr.getAst1(); + if (idAst.isScalar()) { + throw new SemanticException("Cannot use " + idAst + " as an array."); + } + idAst.setArray(true); + } + arr.populateTargetReferenceTuples(tuples); + tuples.assignMapElement(); } else if (getAst2() instanceof DollarExpressionAst) { DollarExpressionAst dollarExpr = (DollarExpressionAst) getAst2(); if (dollarExpr.getAst2() != null) { @@ -4899,13 +5073,16 @@ public int populateTuples(AwkTuples tuples) { pushSourceLineNumber(tuples); if (getAst1() instanceof ArrayReferenceAst) { - IDAst idAst = (IDAst) getAst1().getAst1(); - if (idAst.isScalar()) { - throw new SemanticException("delete: Cannot use a scalar as an array."); + ArrayReferenceAst arrAst = (ArrayReferenceAst) getAst1(); + if (arrAst.getAst1() instanceof IDAst) { + IDAst idAst = (IDAst) arrAst.getAst1(); + if (idAst.isScalar()) { + throw new SemanticException("delete: Cannot use a scalar as an array."); + } + idAst.setArray(true); } - idAst.setArray(true); - getAst1().getAst2().populateTuples(tuples); // idx on the stack - tuples.deleteArrayElement(idAst.offset, idAst.isGlobal); + arrAst.populateTargetReferenceTuples(tuples); + tuples.deleteMapElement(); } else if (getAst1() instanceof IDAst) { IDAst idAst = (IDAst) getAst1(); if (idAst.isScalar()) { diff --git a/src/main/java/io/jawk/intermediate/AwkTuples.java b/src/main/java/io/jawk/intermediate/AwkTuples.java index 4d5394c9..016f70f6 100644 --- a/src/main/java/io/jawk/intermediate/AwkTuples.java +++ b/src/main/java/io/jawk/intermediate/AwkTuples.java @@ -327,6 +327,13 @@ public void assignArray(int offset, boolean isGlobal) { queue.add(new Tuple(Opcode.ASSIGN_ARRAY, offset, isGlobal)); } + /** + * Assigns a value to a stack-provided associative-array element. + */ + public void assignMapElement() { + queue.add(new Tuple(Opcode.ASSIGN_MAP_ELEMENT)); + } + /** *

* assignAsInput. @@ -451,6 +458,13 @@ public void plusEqArray(int offset, boolean isGlobal) { queue.add(new Tuple(Opcode.PLUS_EQ_ARRAY, offset, isGlobal)); } + /** + * Applies {@code +=} to a stack-provided associative-array element. + */ + public void plusEqMapElement() { + queue.add(new Tuple(Opcode.PLUS_EQ_MAP_ELEMENT)); + } + /** *

* minusEqArray. @@ -463,6 +477,13 @@ public void minusEqArray(int offset, boolean isGlobal) { queue.add(new Tuple(Opcode.MINUS_EQ_ARRAY, offset, isGlobal)); } + /** + * Applies {@code -=} to a stack-provided associative-array element. + */ + public void minusEqMapElement() { + queue.add(new Tuple(Opcode.MINUS_EQ_MAP_ELEMENT)); + } + /** *

* multEqArray. @@ -475,6 +496,13 @@ public void multEqArray(int offset, boolean isGlobal) { queue.add(new Tuple(Opcode.MULT_EQ_ARRAY, offset, isGlobal)); } + /** + * Applies {@code *=} to a stack-provided associative-array element. + */ + public void multEqMapElement() { + queue.add(new Tuple(Opcode.MULT_EQ_MAP_ELEMENT)); + } + /** *

* divEqArray. @@ -487,6 +515,13 @@ public void divEqArray(int offset, boolean isGlobal) { queue.add(new Tuple(Opcode.DIV_EQ_ARRAY, offset, isGlobal)); } + /** + * Applies {@code /=} to a stack-provided associative-array element. + */ + public void divEqMapElement() { + queue.add(new Tuple(Opcode.DIV_EQ_MAP_ELEMENT)); + } + /** *

* modEqArray. @@ -499,6 +534,13 @@ public void modEqArray(int offset, boolean isGlobal) { queue.add(new Tuple(Opcode.MOD_EQ_ARRAY, offset, isGlobal)); } + /** + * Applies {@code %=} to a stack-provided associative-array element. + */ + public void modEqMapElement() { + queue.add(new Tuple(Opcode.MOD_EQ_MAP_ELEMENT)); + } + /** *

* powEqArray. @@ -511,6 +553,14 @@ public void powEqArray(int offset, boolean isGlobal) { queue.add(new Tuple(Opcode.POW_EQ_ARRAY, offset, isGlobal)); } + /** + * Applies exponentiation assignment to a stack-provided associative-array + * element. + */ + public void powEqMapElement() { + queue.add(new Tuple(Opcode.POW_EQ_MAP_ELEMENT)); + } + /** *

* plusEqInputField. @@ -714,6 +764,16 @@ public void subForArrayReference(int offset, boolean isGlobal, boolean isGsub) { queue.add(new Tuple(Opcode.SUB_FOR_ARRAY_REFERENCE, offset, isGlobal, isGsub)); } + /** + * Applies {@code sub}/{@code gsub} to a stack-provided associative-array + * element. + * + * @param isGsub {@code true} for {@code gsub}, {@code false} for {@code sub} + */ + public void subForMapReference(boolean isGsub) { + queue.add(new Tuple(Opcode.SUB_FOR_MAP_REFERENCE, isGsub)); + } + /** *

* split. @@ -886,6 +946,13 @@ public void incArrayRef(int offset, boolean isGlobal) { queue.add(new Tuple(Opcode.INC_ARRAY_REF, offset, isGlobal)); } + /** + * Increments a stack-provided associative-array element reference. + */ + public void incMapRef() { + queue.add(new Tuple(Opcode.INC_MAP_REF)); + } + /** *

* decArrayRef. @@ -898,6 +965,13 @@ public void decArrayRef(int offset, boolean isGlobal) { queue.add(new Tuple(Opcode.DEC_ARRAY_REF, offset, isGlobal)); } + /** + * Decrements a stack-provided associative-array element reference. + */ + public void decMapRef() { + queue.add(new Tuple(Opcode.DEC_MAP_REF)); + } + /** *

* incDollarRef. @@ -997,6 +1071,14 @@ public void dereferenceArray() { queue.add(new Tuple(Opcode.DEREF_ARRAY)); } + /** + * Dereferences an associative-array element as a nested array, creating it if + * needed. + */ + public void ensureArrayElement() { + queue.add(new Tuple(Opcode.ENSURE_ARRAY_ELEMENT)); + } + /** *

* key list. @@ -1526,6 +1608,13 @@ public void deleteArrayElement(int offset, boolean isGlobal) { queue.add(new Tuple(Opcode.DELETE_ARRAY_ELEMENT, offset, isGlobal)); } + /** + * Deletes a stack-provided associative-array element. + */ + public void deleteMapElement() { + queue.add(new Tuple(Opcode.DELETE_MAP_ELEMENT)); + } + /** *

* deleteArray. diff --git a/src/main/java/io/jawk/intermediate/Opcode.java b/src/main/java/io/jawk/intermediate/Opcode.java index 895041d0..7fa6b783 100644 --- a/src/main/java/io/jawk/intermediate/Opcode.java +++ b/src/main/java/io/jawk/intermediate/Opcode.java @@ -217,6 +217,14 @@ public enum Opcode { * Stack after: item ... */ ASSIGN_ARRAY, + /** + * Assigns an item to an element of the associative array currently on the stack. + * The item remains on the stack. + *

+ * Stack before: associative-array array-index item ...
+ * Stack after: item ... + */ + ASSIGN_MAP_ELEMENT, /** * Assigns the top-of-stack to $0. The contents of the stack are unaffected. * Upon assignment, individual field variables are recalculated. @@ -392,6 +400,60 @@ public enum Opcode { * Stack after: x^n ... */ POW_EQ_ARRAY, + /** + * Increase the contents of a stack-provided associative-array element by an + * adjustment value; assigns the result to the array and pushes the result onto + * the stack. + *

+ * Stack before: associative-array array-idx n ...
+ * Stack after: x+n ... + */ + PLUS_EQ_MAP_ELEMENT, + /** + * Decreases the contents of a stack-provided associative-array element by an + * adjustment value; assigns the result to the array and pushes the result onto + * the stack. + *

+ * Stack before: associative-array array-idx n ...
+ * Stack after: x-n ... + */ + MINUS_EQ_MAP_ELEMENT, + /** + * Multiplies the contents of a stack-provided associative-array element by an + * adjustment value; assigns the result to the array and pushes the result onto + * the stack. + *

+ * Stack before: associative-array array-idx n ...
+ * Stack after: x*n ... + */ + MULT_EQ_MAP_ELEMENT, + /** + * Divides the contents of a stack-provided associative-array element by an + * adjustment value; assigns the result to the array and pushes the result onto + * the stack. + *

+ * Stack before: associative-array array-idx n ...
+ * Stack after: x/n ... + */ + DIV_EQ_MAP_ELEMENT, + /** + * Takes the modulus of the contents of a stack-provided associative-array + * element by an adjustment value; assigns the result to the array and pushes the + * result onto the stack. + *

+ * Stack before: associative-array array-idx n ...
+ * Stack after: x%n ... + */ + MOD_EQ_MAP_ELEMENT, + /** + * Raises the contents of a stack-provided associative-array element to the + * power of an adjustment value; assigns the result to the array and pushes the + * result onto the stack. + *

+ * Stack before: associative-array array-idx n ...
+ * Stack after: x^n ... + */ + POW_EQ_MAP_ELEMENT, /** * Increases the contents of an input field by an adjustment value; * assigns the result to the input field and pushes the result onto the stack. @@ -592,6 +654,16 @@ public enum Opcode { * Stack after: ... */ SUB_FOR_ARRAY_REFERENCE, + /** + * Built-in function that substitutes an occurrence (or all occurrences) of a + * string in a particular stack-provided array cell and replaces it with another. + *

+ * Argument 1: is global sub + *

+ * Stack before: associative-array array-index regexp replacement-string orig-string ...
+ * Stack after: ... + */ + SUB_FOR_MAP_REFERENCE, /** * Built-in function to split a string by a regexp and put the * components into an array. @@ -757,6 +829,20 @@ public enum Opcode { * Stack after: x-1 ... */ DEC_ARRAY_REF, + /** + * Increases the stack-provided array element reference by one. + *

+ * Stack before: associative-array array-idx ...
+ * Stack after: x+1 ... + */ + INC_MAP_REF, + /** + * Decreases the stack-provided array element reference by one. + *

+ * Stack before: associative-array array-idx ...
+ * Stack after: x-1 ... + */ + DEC_MAP_REF, /** * Increases the input field variable by one; pushes the result * onto the stack. @@ -830,6 +916,14 @@ public enum Opcode { /** Constant DEREF_ARRAY=336 */ DEREF_ARRAY, + /** + * Dereferences an associative-array element as an array, creating a nested + * array when the element is currently blank or uninitialized. + *

+ * Stack before: associative-array array-index ...
+ * Stack after: nested-associative-array ... + */ + ENSURE_ARRAY_ELEMENT, // for (x in y) {keyset} support /** @@ -1148,6 +1242,13 @@ public enum Opcode { * Stack after: ... */ DELETE_ARRAY_ELEMENT, + /** + * Deletes an entry in a stack-provided associative array. + *

+ * Stack before: associative-array array-index
+ * Stack after: ... + */ + DELETE_MAP_ELEMENT, /** * Internal. diff --git a/src/main/java/io/jawk/util/AwkSettings.java b/src/main/java/io/jawk/util/AwkSettings.java index 985a4110..2d5d9047 100644 --- a/src/main/java/io/jawk/util/AwkSettings.java +++ b/src/main/java/io/jawk/util/AwkSettings.java @@ -84,6 +84,12 @@ public class AwkSettings { */ private volatile boolean useSortedArrayKeys = false; + /** + * Whether to accept gawk-style arrays of arrays syntax such as {@code a[i][j]}. + * true by default. + */ + private volatile boolean allowArraysOfArrays = true; + /** * Locale for the output of numbers * US-English by default. @@ -119,6 +125,7 @@ public String toDescriptionString() { desc.append("variables = ").append(getVariables()).append(newLine); desc.append("fieldSeparator = ").append(getFieldSeparator()).append(newLine); desc.append("useSortedArrayKeys = ").append(isUseSortedArrayKeys()).append(newLine); + desc.append("allowArraysOfArrays = ").append(isAllowArraysOfArrays()).append(newLine); return desc.toString(); } @@ -136,6 +143,9 @@ public String toExtensionDescription() { if (isUseSortedArrayKeys()) { extensions.append(", associative array keys are sorted"); } + if (isAllowArraysOfArrays()) { + extensions.append(", arrays of arrays"); + } if (extensions.length() > 0) { return "{extensions: " + extensions.substring(2) + "}"; } else { @@ -267,6 +277,26 @@ public void setUseSortedArrayKeys(boolean useSortedArrayKeys) { markModified(); } + /** + * Whether to accept gawk-style arrays of arrays syntax such as {@code a[i][j]}. + * + * @return {@code true} when arrays of arrays are enabled at compile time + */ + public boolean isAllowArraysOfArrays() { + return allowArraysOfArrays; + } + + /** + * Enables or disables gawk-style arrays of arrays syntax such as + * {@code a[i][j]}. + * + * @param allowArraysOfArrays {@code true} to accept arrays-of-arrays syntax + */ + public void setAllowArraysOfArrays(boolean allowArraysOfArrays) { + this.allowArraysOfArrays = allowArraysOfArrays; + markModified(); + } + /** *

* Getter for the field locale. @@ -339,6 +369,11 @@ public void setUseSortedArrayKeys(boolean useSortedArrayKeys) { throw unsupported(); } + @Override + public void setAllowArraysOfArrays(boolean allowArraysOfArrays) { + throw unsupported(); + } + @Override public void setLocale(Locale pLocale) { throw unsupported(); diff --git a/src/site/markdown/compatibility.md b/src/site/markdown/compatibility.md index 3f614c9e..900d6771 100644 --- a/src/site/markdown/compatibility.md +++ b/src/site/markdown/compatibility.md @@ -14,6 +14,7 @@ Jawk keeps the AWK language model while adding JVM-oriented capabilities: - it runs entirely in Java and can be embedded directly in applications - it exposes a serializable tuple representation for compilation and reuse - it can maintain associative array keys in sorted order +- it supports gawk-style arrays of arrays (`a[i][j]`) in addition to classic AWK multi-dimensional subscripts (`a[i, j]`) - it supports explicit extensions - it offers a sandboxed compiler and runtime diff --git a/src/site/markdown/java-compile.md b/src/site/markdown/java-compile.md index ae66757b..6d55237e 100644 --- a/src/site/markdown/java-compile.md +++ b/src/site/markdown/java-compile.md @@ -66,6 +66,16 @@ awk.script(program) This keeps compilation and execution separate, which is useful when the same AWK program is reused across multiple inputs. +Compilation settings matter here. For example, gawk-style arrays of arrays (`a[i][j]`) are accepted by default, but you can disable that syntax before compiling: + +```java +AwkSettings settings = new AwkSettings(); +settings.setAllowArraysOfArrays(false); + +Awk awk = new Awk(settings); +AwkProgram program = awk.compile("{ print a[1,2] }"); +``` + ## Choosing the Right Reuse Strategy - Use `eval(String...)` when the expression is cheap and called only occasionally. diff --git a/src/site/markdown/java.md b/src/site/markdown/java.md index 0987fd59..be12a84d 100644 --- a/src/site/markdown/java.md +++ b/src/site/markdown/java.md @@ -36,12 +36,22 @@ Awk awk = new Awk(settings); | `setLocale(Locale)` | `Locale.US` | Locale for numeric output formatting | | `setDefaultRS(String)` | Platform line separator | Default value for `RS`, the record separator | | `setUseSortedArrayKeys(boolean)` | `false` | Whether to keep associative array keys in sorted order | +| `setAllowArraysOfArrays(boolean)` | `true` | Whether the compiler accepts gawk-style nested array syntax such as `a[i][j]` | | `putVariable(String, Object)` | Empty map | Pre-set variables available before `BEGIN` | Output destination is specified per-call on the builder (`execute()`, `execute(PrintStream)`, `execute(OutputStream)`, `execute(Appendable)`, or `execute(AwkSink)`). See the [Custom Output](java-output.html) guide for details. For more on passing variables to scripts, see [Variables and Arguments](java-variables.html). +By default, Jawk accepts both classic multi-dimensional array syntax (`a[i, j]`) and gawk-style arrays of arrays (`a[i][j]`). Disable the gawk-style parser mode when you need strict classic AWK parsing: + +```java +AwkSettings settings = new AwkSettings(); +settings.setAllowArraysOfArrays(false); + +Awk awk = new Awk(settings); +``` + Construct it with extension instances when you want those functions available to the script: ```java diff --git a/src/test/java/io/jawk/AwkParserTest.java b/src/test/java/io/jawk/AwkParserTest.java index 53f733a7..ce7699e9 100644 --- a/src/test/java/io/jawk/AwkParserTest.java +++ b/src/test/java/io/jawk/AwkParserTest.java @@ -28,6 +28,8 @@ import java.io.InputStream; import org.junit.Test; import io.jawk.frontend.ast.LexerException; +import io.jawk.frontend.ast.ParserException; +import io.jawk.util.AwkSettings; public class AwkParserTest { @@ -191,6 +193,15 @@ public void testPowAssignment() throws Exception { .runAndAssert(); } + @Test + public void testArraysOfArraysCanBeDisabled() { + AwkSettings settings = new AwkSettings(); + settings.setAllowArraysOfArrays(false); + Awk awk = new Awk(settings); + + assertThrows(ParserException.class, () -> awk.compile("BEGIN { a[1][2] = 42 }")); + } + @Test public void testOperatorPrecedence() throws Exception { AwkTestSupport diff --git a/src/test/java/io/jawk/AwkTest.java b/src/test/java/io/jawk/AwkTest.java index 793a9e9b..1c0fbb75 100644 --- a/src/test/java/io/jawk/AwkTest.java +++ b/src/test/java/io/jawk/AwkTest.java @@ -794,6 +794,123 @@ public void testSubsepChangeAfterIndexCreation() throws Exception { .runAndAssert(); } + @Test + public void testArraysOfArraysAssignmentAndLookup() throws Exception { + AwkTestSupport + .awkTest("arrays of arrays assignment and lookup") + .script("BEGIN { a[1][2] = 42; print a[1][2] }") + .expect("42\n") + .runAndAssert(); + } + + @Test + public void testArraysOfArraysSupportsMixedCommaSubscripts() throws Exception { + AwkTestSupport + .awkTest("arrays of arrays mixed with comma subscripts") + .script("BEGIN { SUBSEP = \"@\"; a[1][2,3] = 42; print a[1][2 SUBSEP 3]; print a[1][2,3] }") + .expect("42\n42\n") + .runAndAssert(); + } + + @Test + public void testArraysOfArraysRejectScalarAsArray() throws Exception { + AwkTestSupport + .awkTest("arrays of arrays reject scalar as array") + .script("BEGIN { a[1] = 5; print a[1][2] }") + .expectThrow(RuntimeException.class) + .runAndAssert(); + } + + @Test + public void testArraysOfArraysMembershipDeleteAndIteration() throws Exception { + AwkSettings settings = new AwkSettings(); + settings.setUseSortedArrayKeys(true); + + AwkTestSupport + .awkTest("arrays of arrays membership delete and iteration") + .withAwk(new Awk(settings)) + .script( + "BEGIN { a[1][\"x\"] = 1; a[1][\"y\"] = 2; print ((\"x\" in a[1]) ? \"yes\" : \"no\"); delete a[1][\"x\"]; print ((\"x\" in a[1]) ? \"yes\" : \"no\"); for (k in a[1]) print k \":\" a[1][k] }") + .expectLines("yes", "no", "y:2") + .runAndAssert(); + } + + @Test + public void testArraysOfArraysSupportsNestedIncrement() throws Exception { + AwkTestSupport + .awkTest("arrays of arrays nested increment") + .script("BEGIN { old = a[1][2]++; print old, a[1][2]; ++a[1][2]; print a[1][2] }") + .expectLines(" 1", "2") + .runAndAssert(); + } + + @Test + public void testArraysOfArraysSupportsSplitIntoSubarray() throws Exception { + AwkTestSupport + .awkTest("split into subarray") + .script("BEGIN { print split(\"x y\", a[1]), a[1][1], a[1][2] }") + .expectLines("2 x y") + .runAndAssert(); + } + + @Test + public void testArraysOfArraysSupportsGetlineIntoSubarray() throws Exception { + AwkTestSupport + .awkTest("getline into subarray") + .script("BEGIN { getline a[1][2]; print a[1][2] }") + .stdin("hello\n") + .expect("hello\n") + .runAndAssert(); + } + + @Test + public void testArraysOfArraysSupportsSubOnNestedElement() throws Exception { + AwkTestSupport + .awkTest("sub on nested array element") + .script("BEGIN { a[1][2] = \"abc\"; print sub(/b/, \"x\", a[1][2]), a[1][2] }") + .expectLines("1 axc") + .runAndAssert(); + } + + @Test + public void testArraysOfArraysSupportsFunctionArrayParameters() throws Exception { + AwkTestSupport + .awkTest("subarray passed as array parameter") + .script("function fill(arr) { arr[\"key\"] = \"value\" } BEGIN { fill(a[1]); print a[1][\"key\"] }") + .expectLines("value") + .runAndAssert(); + } + + @Test + public void testArraysOfArraysSupportsDeletingPassedSubarrays() throws Exception { + AwkTestSupport + .awkTest("delete passed subarray") + .script( + "function f(c, b) { delete b; b[1] = 1; print c[1][1], b[1]; delete c[2] } BEGIN { a[1][1] = 11; a[1][2] = 12; a[2] = 2; delete a[1][1]; f(a, a[1]); print a[1][1]; print length(a), length(a[1]); delete a; print length(a), length(a[1]), length(a); a[1][1] = 11 }") + .expectLines("1 1", "1", "1 1", "0 0 1") + .runAndAssert(); + } + + @Test + public void testArraysOfArraysRejectArrayAsScalarSubscript() throws Exception { + AwkTestSupport + .awkTest("array value cannot be used as scalar subscript") + .script( + "function f(arr, s) { delete arr[s[1]][1] } BEGIN { a[1][1] = 1; b[1][1] = 11; f(b, a) }") + .expectThrow(RuntimeException.class) + .runAndAssert(); + } + + @Test + public void testArraysOfArraysHandlesNestedLengthIterationAndMutation() throws Exception { + AwkTestSupport + .awkTest("gawk-style arrays of arrays flow") + .script( + "function f(x, i) { for (i = 1; i <= length(x); i++) print x[i]; x[1] = 1001 } BEGIN { a[1][1] = 10; a[1][2] = 20; a[1][3] = 30; a[2] = \"hello world! we have multi-dimensional array\"; a[3, \"X\"] = \"Y\"; print length(a), length(a[1]); delete a[2]; delete a[3, \"X\"]; a[2][1] = 100; a[2][2] = 200; a[2][3] = 300; for (i in a) { sum[i] = 0; for (j in a[i]) sum[i] += a[i][j] } print sum[1], sum[2]; f(a[1]); print a[1][1] }") + .expectLines("3 3", "60 600", "10", "20", "30", "1001") + .runAndAssert(); + } + @Test public void testGetlineDefaultVariable() throws Exception { String script = "BEGIN { while (getline && n++ < 2) print; exit }"; diff --git a/src/test/java/io/jawk/PosixConformanceTest.java b/src/test/java/io/jawk/PosixConformanceTest.java index 8baf5cee..9a49f8fb 100644 --- a/src/test/java/io/jawk/PosixConformanceTest.java +++ b/src/test/java/io/jawk/PosixConformanceTest.java @@ -556,7 +556,6 @@ public void posix84ForLoopClassic() throws Exception { @Test public void posix85ForInDeleteAll() throws Exception { - Assume.assumeTrue("length(array) is not supported", false); AwkTestSupport .awkTest("POSIX 8.5 for in delete all elements") .script("BEGIN{ split(\"a b c\", a, \" \" ); for (i in a) delete a[i]; print length(a) }") @@ -744,7 +743,6 @@ public void posix103LengthOnDollarZeroAndString() throws Exception { @Test public void posix104LengthOfArray() throws Exception { - Assume.assumeTrue("length(array) is not supported", false); AwkTestSupport .awkTest("POSIX 10.4 length of array") .script("BEGIN{ split(\"a b c\", A, \" \" ); print length(A) }") @@ -881,7 +879,6 @@ public void posix112InOperatorDoesNotCreateElements() throws Exception { @Test public void posix113DeleteArrayClearsAll() throws Exception { - Assume.assumeTrue("length(array) is not supported", false); AwkTestSupport .awkTest("POSIX 11.3 delete array clears elements") .script("BEGIN{ split(\"a b\", A, \" \" ); delete A; print length(A) }") From 7765bf17e8ccdf1b745a45c7707d920db739cfb0 Mon Sep 17 00:00:00 2001 From: Bertrand Martin Date: Thu, 16 Apr 2026 18:11:03 -0400 Subject: [PATCH 2/6] fix: address arrays-of-arrays review feedback --- src/main/java/io/jawk/frontend/AwkParser.java | 9 ++++--- .../java/io/jawk/intermediate/Opcode.java | 24 +++++++++---------- src/test/java/io/jawk/AwkParserTest.java | 1 + src/test/java/io/jawk/AwkTest.java | 10 ++++++++ 4 files changed, 29 insertions(+), 15 deletions(-) diff --git a/src/main/java/io/jawk/frontend/AwkParser.java b/src/main/java/io/jawk/frontend/AwkParser.java index c22a552e..5ba416b5 100644 --- a/src/main/java/io/jawk/frontend/AwkParser.java +++ b/src/main/java/io/jawk/frontend/AwkParser.java @@ -2069,6 +2069,9 @@ private void populateArrayOperandTuples( return; } if (arrayAst instanceof ArrayReferenceAst) { + if (!allowArraysOfArrays) { + arrayAst.throwSemanticException(errorMessage); + } ((ArrayReferenceAst) arrayAst).populateArrayValueTuples(tuples, createIfMissing); return; } @@ -2088,7 +2091,7 @@ private int populateActualParameters( params.getAst1(), tuples, true, - "Parameter position " + parameterIndex + " must be an array or subarray."); + "Parameter position " + (parameterIndex + 1) + " must be an array or subarray."); } else { params.getAst1().populateTuples(tuples); } @@ -4393,8 +4396,8 @@ public String toString() { @Override public int populateTuples(AwkTuples tuples) { pushSourceLineNumber(tuples); - // get the array var - getAst1().populateTuples(tuples); + // get the containing array, autovivifying missing parent subarrays + populateContainerTuples(tuples); // get the index getAst2().populateTuples(tuples); tuples.dereferenceArray(); diff --git a/src/main/java/io/jawk/intermediate/Opcode.java b/src/main/java/io/jawk/intermediate/Opcode.java index 7fa6b783..0a865c7a 100644 --- a/src/main/java/io/jawk/intermediate/Opcode.java +++ b/src/main/java/io/jawk/intermediate/Opcode.java @@ -221,7 +221,7 @@ public enum Opcode { * Assigns an item to an element of the associative array currently on the stack. * The item remains on the stack. *

- * Stack before: associative-array array-index item ...
+ * Stack before: array-index associative-array item ...
* Stack after: item ... */ ASSIGN_MAP_ELEMENT, @@ -405,7 +405,7 @@ public enum Opcode { * adjustment value; assigns the result to the array and pushes the result onto * the stack. *

- * Stack before: associative-array array-idx n ...
+ * Stack before: array-idx associative-array n ...
* Stack after: x+n ... */ PLUS_EQ_MAP_ELEMENT, @@ -414,7 +414,7 @@ public enum Opcode { * adjustment value; assigns the result to the array and pushes the result onto * the stack. *

- * Stack before: associative-array array-idx n ...
+ * Stack before: array-idx associative-array n ...
* Stack after: x-n ... */ MINUS_EQ_MAP_ELEMENT, @@ -423,7 +423,7 @@ public enum Opcode { * adjustment value; assigns the result to the array and pushes the result onto * the stack. *

- * Stack before: associative-array array-idx n ...
+ * Stack before: array-idx associative-array n ...
* Stack after: x*n ... */ MULT_EQ_MAP_ELEMENT, @@ -432,7 +432,7 @@ public enum Opcode { * adjustment value; assigns the result to the array and pushes the result onto * the stack. *

- * Stack before: associative-array array-idx n ...
+ * Stack before: array-idx associative-array n ...
* Stack after: x/n ... */ DIV_EQ_MAP_ELEMENT, @@ -441,7 +441,7 @@ public enum Opcode { * element by an adjustment value; assigns the result to the array and pushes the * result onto the stack. *

- * Stack before: associative-array array-idx n ...
+ * Stack before: array-idx associative-array n ...
* Stack after: x%n ... */ MOD_EQ_MAP_ELEMENT, @@ -450,7 +450,7 @@ public enum Opcode { * power of an adjustment value; assigns the result to the array and pushes the * result onto the stack. *

- * Stack before: associative-array array-idx n ...
+ * Stack before: array-idx associative-array n ...
* Stack after: x^n ... */ POW_EQ_MAP_ELEMENT, @@ -660,7 +660,7 @@ public enum Opcode { *

* Argument 1: is global sub *

- * Stack before: associative-array array-index regexp replacement-string orig-string ...
+ * Stack before: array-index associative-array orig-string replacement-string regexp ...
* Stack after: ... */ SUB_FOR_MAP_REFERENCE, @@ -832,14 +832,14 @@ public enum Opcode { /** * Increases the stack-provided array element reference by one. *

- * Stack before: associative-array array-idx ...
+ * Stack before: array-idx associative-array ...
* Stack after: x+1 ... */ INC_MAP_REF, /** * Decreases the stack-provided array element reference by one. *

- * Stack before: associative-array array-idx ...
+ * Stack before: array-idx associative-array ...
* Stack after: x-1 ... */ DEC_MAP_REF, @@ -920,7 +920,7 @@ public enum Opcode { * Dereferences an associative-array element as an array, creating a nested * array when the element is currently blank or uninitialized. *

- * Stack before: associative-array array-index ...
+ * Stack before: array-index associative-array ...
* Stack after: nested-associative-array ... */ ENSURE_ARRAY_ELEMENT, @@ -1245,7 +1245,7 @@ public enum Opcode { /** * Deletes an entry in a stack-provided associative array. *

- * Stack before: associative-array array-index
+ * Stack before: array-index associative-array
* Stack after: ... */ DELETE_MAP_ELEMENT, diff --git a/src/test/java/io/jawk/AwkParserTest.java b/src/test/java/io/jawk/AwkParserTest.java index ce7699e9..77f57286 100644 --- a/src/test/java/io/jawk/AwkParserTest.java +++ b/src/test/java/io/jawk/AwkParserTest.java @@ -200,6 +200,7 @@ public void testArraysOfArraysCanBeDisabled() { Awk awk = new Awk(settings); assertThrows(ParserException.class, () -> awk.compile("BEGIN { a[1][2] = 42 }")); + assertThrows(RuntimeException.class, () -> awk.compile("BEGIN { print ((\"x\" in a[1]) ? 1 : 0) }")); } @Test diff --git a/src/test/java/io/jawk/AwkTest.java b/src/test/java/io/jawk/AwkTest.java index 1c0fbb75..4b3429c7 100644 --- a/src/test/java/io/jawk/AwkTest.java +++ b/src/test/java/io/jawk/AwkTest.java @@ -821,6 +821,16 @@ public void testArraysOfArraysRejectScalarAsArray() throws Exception { .runAndAssert(); } + @Test + public void testArraysOfArraysReadAutovivifiesMissingParentSubarray() throws Exception { + AwkTestSupport + .awkTest("arrays of arrays read autovivifies missing parent subarray") + .script( + "BEGIN { print \"[\" a[1][2] \"]\"; print (((1 in a) ? \"yes\" : \"no\") \" \" ((2 in a[1]) ? \"yes\" : \"no\")) }") + .expectLines("[]", "yes yes") + .runAndAssert(); + } + @Test public void testArraysOfArraysMembershipDeleteAndIteration() throws Exception { AwkSettings settings = new AwkSettings(); From 38072e9e2e6bcacd2d66e73789b143d785db5412 Mon Sep 17 00:00:00 2001 From: Bertrand Martin Date: Sat, 18 Apr 2026 09:58:09 -0700 Subject: [PATCH 3/6] fix: address remaining arrays-of-arrays review feedback --- src/main/java/io/jawk/backend/AVM.java | 10 +++++++ src/main/java/io/jawk/frontend/AwkParser.java | 2 ++ .../java/io/jawk/intermediate/AwkTuples.java | 7 +++++ .../java/io/jawk/intermediate/Opcode.java | 8 +++++ src/test/java/io/jawk/AwkTest.java | 29 ++++++++++++++++++- 5 files changed, 55 insertions(+), 1 deletion(-) diff --git a/src/main/java/io/jawk/backend/AVM.java b/src/main/java/io/jawk/backend/AVM.java index 838fe495..2fa93de0 100644 --- a/src/main/java/io/jawk/backend/AVM.java +++ b/src/main/java/io/jawk/backend/AVM.java @@ -709,6 +709,16 @@ private void executeTuples(PositionTracker position) position.next(); break; } + case BLANK_TO_ZERO: { + Object value = pop(); + if (value == null || value instanceof UninitializedObject) { + push(ZERO); + } else { + push(value); + } + position.next(); + break; + } case IFTRUE: { // arg[0] = address to jump to if top of stack is true // stack[0] = item to check diff --git a/src/main/java/io/jawk/frontend/AwkParser.java b/src/main/java/io/jawk/frontend/AwkParser.java index 5ba416b5..3fe68ffb 100644 --- a/src/main/java/io/jawk/frontend/AwkParser.java +++ b/src/main/java/io/jawk/frontend/AwkParser.java @@ -4704,6 +4704,7 @@ public int populateTuples(AwkTuples tuples) { } else { if (getAst1() instanceof ArrayReferenceAst) { ((ArrayReferenceAst) getAst1()).populateTargetValueTuples(tuples); + tuples.blankToZero(); } else { getAst1().populateTuples(tuples); } @@ -4741,6 +4742,7 @@ public int populateTuples(AwkTuples tuples) { pushSourceLineNumber(tuples); if (getAst1() instanceof ArrayReferenceAst) { ((ArrayReferenceAst) getAst1()).populateTargetValueTuples(tuples); + tuples.blankToZero(); } else { getAst1().populateTuples(tuples); } diff --git a/src/main/java/io/jawk/intermediate/AwkTuples.java b/src/main/java/io/jawk/intermediate/AwkTuples.java index 016f70f6..4a3fbbf7 100644 --- a/src/main/java/io/jawk/intermediate/AwkTuples.java +++ b/src/main/java/io/jawk/intermediate/AwkTuples.java @@ -148,6 +148,13 @@ public void toNumber() { queue.add(new Tuple(Opcode.TO_NUMBER)); } + /** + * Replaces a blank or uninitialized top-of-stack value with numeric zero. + */ + public void blankToZero() { + queue.add(new Tuple(Opcode.BLANK_TO_ZERO)); + } + /** *

* ifTrue. diff --git a/src/main/java/io/jawk/intermediate/Opcode.java b/src/main/java/io/jawk/intermediate/Opcode.java index 0a865c7a..4f5eea2a 100644 --- a/src/main/java/io/jawk/intermediate/Opcode.java +++ b/src/main/java/io/jawk/intermediate/Opcode.java @@ -74,6 +74,14 @@ public enum Opcode { * Stack after: x (as a number) */ TO_NUMBER, + /** + * Replaces a blank or uninitialized top-of-stack value with numeric zero. + * Non-blank values remain unchanged. + *

+ * Stack before: x ...
+ * Stack after: x (or 0 when blank) ... + */ + BLANK_TO_ZERO, /** * Pops and evaluates the top-of-stack; if * true, it jumps to a specified address. diff --git a/src/test/java/io/jawk/AwkTest.java b/src/test/java/io/jawk/AwkTest.java index 4b3429c7..96299da4 100644 --- a/src/test/java/io/jawk/AwkTest.java +++ b/src/test/java/io/jawk/AwkTest.java @@ -49,6 +49,7 @@ import org.junit.Test; import io.jawk.backend.AVM; import io.jawk.frontend.ast.ParserException; +import io.jawk.jrt.AwkRuntimeException; import io.jawk.jrt.AppendableAwkSink; import io.jawk.jrt.AwkSink; import io.jawk.jrt.InputSource; @@ -821,6 +822,32 @@ public void testArraysOfArraysRejectScalarAsArray() throws Exception { .runAndAssert(); } + @Test + public void testArraysOfArraysReportLineNumberWhenScalarUsedAsArray() throws Exception { + AwkTestSupport.TestResult result = AwkTestSupport + .awkTest("arrays of arrays scalar used as array line number") + .script("BEGIN {\n a[1] = 5\n print a[1][2]\n}") + .expectThrow(AwkRuntimeException.class) + .run(); + result.assertExpected(); + AwkRuntimeException ex = (AwkRuntimeException) result.thrownException(); + assertEquals(3, ex.getLineNumber()); + assertTrue(ex.getMessage(), ex.getMessage().contains("Attempting to use a scalar as an array.")); + } + + @Test + public void testArraysOfArraysReportLineNumberWhenArrayUsedAsScalarSubscript() throws Exception { + AwkTestSupport.TestResult result = AwkTestSupport + .awkTest("arrays of arrays array used as scalar subscript line number") + .script("BEGIN {\n a[1][1] = 1\n b[1][1] = 2\n print b[a[1]]\n}") + .expectThrow(AwkRuntimeException.class) + .run(); + result.assertExpected(); + AwkRuntimeException ex = (AwkRuntimeException) result.thrownException(); + assertEquals(5, ex.getLineNumber()); + assertTrue(ex.getMessage(), ex.getMessage().contains("Attempting to use an array in a scalar context.")); + } + @Test public void testArraysOfArraysReadAutovivifiesMissingParentSubarray() throws Exception { AwkTestSupport @@ -850,7 +877,7 @@ public void testArraysOfArraysSupportsNestedIncrement() throws Exception { AwkTestSupport .awkTest("arrays of arrays nested increment") .script("BEGIN { old = a[1][2]++; print old, a[1][2]; ++a[1][2]; print a[1][2] }") - .expectLines(" 1", "2") + .expectLines("0 1", "2") .runAndAssert(); } From dfb0e8dbd9dc44dfa5e30207f88cf6077c52c678 Mon Sep 17 00:00:00 2001 From: Bertrand Martin Date: Sat, 18 Apr 2026 11:05:03 -0700 Subject: [PATCH 4/6] fix: preserve array reference source lines --- src/main/java/io/jawk/frontend/AwkParser.java | 60 +++++++++++++++++-- src/test/java/io/jawk/AwkTest.java | 53 ++++++++++------ 2 files changed, 89 insertions(+), 24 deletions(-) diff --git a/src/main/java/io/jawk/frontend/AwkParser.java b/src/main/java/io/jawk/frontend/AwkParser.java index 3fe68ffb..60a4e3ef 100644 --- a/src/main/java/io/jawk/frontend/AwkParser.java +++ b/src/main/java/io/jawk/frontend/AwkParser.java @@ -1553,18 +1553,20 @@ AST SYMBOL(boolean allowComparison, boolean allowInKeyword) throws IOException { } } if (token == Token.OPEN_BRACKET) { + int arrayReferenceLineNo = reader.getLineNumber() + 1; lexer(); AST idxAst = ARRAY_INDEX(true, allowInKeyword); lexer(Token.CLOSE_BRACKET); - AST arrayReference = symbolTable.addArrayReference(id, idxAst); + AST arrayReference = symbolTable.addArrayReference(id, idxAst, arrayReferenceLineNo); if (!allowArraysOfArrays && token == Token.OPEN_BRACKET) { throw parserException("Use [a,b,c,...] instead of [a][b][c]... for multi-dimensional arrays."); } while (allowArraysOfArrays && token == Token.OPEN_BRACKET) { + int nestedArrayReferenceLineNo = reader.getLineNumber() + 1; lexer(); idxAst = ARRAY_INDEX(true, allowInKeyword); lexer(Token.CLOSE_BRACKET); - arrayReference = new ArrayReferenceAst(arrayReference, idxAst); + arrayReference = new ArrayReferenceAst(nestedArrayReferenceLineNo, arrayReference, idxAst); } return arrayReference; } @@ -2126,7 +2128,7 @@ private Set collectArrayParameterIndexes(FunctionDefAst functionDefAst) private abstract class AST extends AstNode { private final String sourceDescription = scriptSources.get(scriptSourcesCurrentIndex).getDescription(); - private final int lineNo = reader.getLineNumber() + 1; + private final int lineNo; private AST parent; private AST ast1, ast2, ast3, ast4; private final EnumSet flags = EnumSet.noneOf(AstFlag.class); @@ -2211,9 +2213,20 @@ protected final AST searchFor(AstFlag flag) { return null; } - protected AST() {} + protected AST() { + this(reader.getLineNumber() + 1); + } + + protected AST(int lineNo) { + this.lineNo = lineNo; + } protected AST(AST ast1) { + this(reader.getLineNumber() + 1, ast1); + } + + protected AST(int lineNo, AST ast1) { + this(lineNo); this.ast1 = ast1; if (ast1 != null) { @@ -2222,6 +2235,11 @@ protected AST(AST ast1) { } protected AST(AST ast1, AST ast2) { + this(reader.getLineNumber() + 1, ast1, ast2); + } + + protected AST(int lineNo, AST ast1, AST ast2) { + this(lineNo); this.ast1 = ast1; this.ast2 = ast2; @@ -2234,6 +2252,11 @@ protected AST(AST ast1, AST ast2) { } protected AST(AST ast1, AST ast2, AST ast3) { + this(reader.getLineNumber() + 1, ast1, ast2, ast3); + } + + protected AST(int lineNo, AST ast1, AST ast2, AST ast3) { + this(lineNo); this.ast1 = ast1; this.ast2 = ast2; this.ast3 = ast3; @@ -2250,6 +2273,11 @@ protected AST(AST ast1, AST ast2, AST ast3) { } protected AST(AST ast1, AST ast2, AST ast3, AST ast4) { + this(reader.getLineNumber() + 1, ast1, ast2, ast3, ast4); + } + + protected AST(int lineNo, AST ast1, AST ast2, AST ast3, AST ast4) { + this(lineNo); this.ast1 = ast1; this.ast2 = ast2; this.ast3 = ast3; @@ -2475,18 +2503,34 @@ protected ScalarExpressionAst() { super(); } + protected ScalarExpressionAst(int lineNo) { + super(lineNo); + } + protected ScalarExpressionAst(AST a1) { super(a1); } + protected ScalarExpressionAst(int lineNo, AST a1) { + super(lineNo, a1); + } + protected ScalarExpressionAst(AST a1, AST a2) { super(a1, a2); } + protected ScalarExpressionAst(int lineNo, AST a1, AST a2) { + super(lineNo, a1, a2); + } + protected ScalarExpressionAst(AST a1, AST a2, AST a3) { super(a1, a2, a3); } + protected ScalarExpressionAst(int lineNo, AST a1, AST a2, AST a3) { + super(lineNo, a1, a2, a3); + } + @Override public boolean isArray() { return false; @@ -4388,6 +4432,10 @@ private ArrayReferenceAst(AST idAst, AST idxAst) { super(idAst, idxAst); } + private ArrayReferenceAst(int lineNo, AST idAst, AST idxAst) { + super(lineNo, idAst, idxAst); + } + @Override public String toString() { return super.toString() + " (" + getAst1() + " [...])"; @@ -5376,8 +5424,8 @@ AST addFunctionCall(String id, AST paramList) { return new FunctionCallAst(functionProxy, paramList); } - AST addArrayReference(String id, AST idxAst) throws ParserException { - return new ArrayReferenceAst(addArrayID(id), idxAst); + AST addArrayReference(String id, AST idxAst, int lineNo) throws ParserException { + return new ArrayReferenceAst(lineNo, addArrayID(id), idxAst); } // constants are no longer cached/hashed so that individual ASTs diff --git a/src/test/java/io/jawk/AwkTest.java b/src/test/java/io/jawk/AwkTest.java index 96299da4..5804ab74 100644 --- a/src/test/java/io/jawk/AwkTest.java +++ b/src/test/java/io/jawk/AwkTest.java @@ -824,28 +824,28 @@ public void testArraysOfArraysRejectScalarAsArray() throws Exception { @Test public void testArraysOfArraysReportLineNumberWhenScalarUsedAsArray() throws Exception { - AwkTestSupport.TestResult result = AwkTestSupport - .awkTest("arrays of arrays scalar used as array line number") - .script("BEGIN {\n a[1] = 5\n print a[1][2]\n}") - .expectThrow(AwkRuntimeException.class) - .run(); - result.assertExpected(); - AwkRuntimeException ex = (AwkRuntimeException) result.thrownException(); - assertEquals(3, ex.getLineNumber()); - assertTrue(ex.getMessage(), ex.getMessage().contains("Attempting to use a scalar as an array.")); + assertRuntimeExceptionLineNumber( + "arrays of arrays scalar used as array line number", + 3, + "Attempting to use a scalar as an array.", + "BEGIN {", // 1 + " a[1] = 5", // 2 + " print a[1][2]", // 3 + "}"); // 4 } @Test public void testArraysOfArraysReportLineNumberWhenArrayUsedAsScalarSubscript() throws Exception { - AwkTestSupport.TestResult result = AwkTestSupport - .awkTest("arrays of arrays array used as scalar subscript line number") - .script("BEGIN {\n a[1][1] = 1\n b[1][1] = 2\n print b[a[1]]\n}") - .expectThrow(AwkRuntimeException.class) - .run(); - result.assertExpected(); - AwkRuntimeException ex = (AwkRuntimeException) result.thrownException(); - assertEquals(5, ex.getLineNumber()); - assertTrue(ex.getMessage(), ex.getMessage().contains("Attempting to use an array in a scalar context.")); + assertRuntimeExceptionLineNumber( + "arrays of arrays array used as scalar subscript line number", + 5, + "Attempting to use an array in a scalar context.", + "BEGIN {", // 1 + " a[1][1] = 1", // 2 + "", // 3 + " b[1][1] = 2", // 4 + " print b[a[1]]", // 5 + "}"); // 6 } @Test @@ -959,6 +959,23 @@ public void testGetlineDefaultVariable() throws Exception { .runAndAssert(); } + private void assertRuntimeExceptionLineNumber( + String description, + int expectedLineNumber, + String expectedMessage, + String... scriptLines) + throws Exception { + AwkTestSupport.TestResult result = AwkTestSupport + .awkTest(description) + .script(String.join("\n", scriptLines)) + .expectThrow(AwkRuntimeException.class) + .run(); + result.assertExpected(); + AwkRuntimeException ex = (AwkRuntimeException) result.thrownException(); + assertEquals(expectedLineNumber, ex.getLineNumber()); + assertTrue(ex.getMessage(), ex.getMessage().contains(expectedMessage)); + } + @Test public void testEvalNumericExpression() throws Exception { Object result = AWK.eval("1 + 2", (String) null); From 18dc95d1a0c929b59bc0211b99395724f8642eef Mon Sep 17 00:00:00 2001 From: Bertrand Martin Date: Sat, 18 Apr 2026 11:13:38 -0700 Subject: [PATCH 5/6] refactor: clarify parser source line capture --- src/main/java/io/jawk/frontend/AwkParser.java | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/main/java/io/jawk/frontend/AwkParser.java b/src/main/java/io/jawk/frontend/AwkParser.java index 60a4e3ef..cb5ecdca 100644 --- a/src/main/java/io/jawk/frontend/AwkParser.java +++ b/src/main/java/io/jawk/frontend/AwkParser.java @@ -375,6 +375,16 @@ private LexerException lexerException(String msg) { reader.getLineNumber()); } + /** + * Returns the current 1-based source line number to stamp onto AST nodes that + * will later emit tuple line markers for runtime error reporting. + * + * @return current source line number using 1-based counting + */ + private int currentSourceLineNumber() { + return reader.getLineNumber() + 1; + } + /** * Reads the string and handle all escape codes. * @@ -1553,7 +1563,7 @@ AST SYMBOL(boolean allowComparison, boolean allowInKeyword) throws IOException { } } if (token == Token.OPEN_BRACKET) { - int arrayReferenceLineNo = reader.getLineNumber() + 1; + int arrayReferenceLineNo = currentSourceLineNumber(); lexer(); AST idxAst = ARRAY_INDEX(true, allowInKeyword); lexer(Token.CLOSE_BRACKET); @@ -1562,7 +1572,7 @@ AST SYMBOL(boolean allowComparison, boolean allowInKeyword) throws IOException { throw parserException("Use [a,b,c,...] instead of [a][b][c]... for multi-dimensional arrays."); } while (allowArraysOfArrays && token == Token.OPEN_BRACKET) { - int nestedArrayReferenceLineNo = reader.getLineNumber() + 1; + int nestedArrayReferenceLineNo = currentSourceLineNumber(); lexer(); idxAst = ARRAY_INDEX(true, allowInKeyword); lexer(Token.CLOSE_BRACKET); @@ -2128,6 +2138,8 @@ private Set collectArrayParameterIndexes(FunctionDefAst functionDefAst) private abstract class AST extends AstNode { private final String sourceDescription = scriptSources.get(scriptSourcesCurrentIndex).getDescription(); + // PositionTracker consumes these tuple-emitted source lines at runtime, but + // AST nodes have to capture them here during parsing before tuples exist. private final int lineNo; private AST parent; private AST ast1, ast2, ast3, ast4; @@ -2214,7 +2226,7 @@ protected final AST searchFor(AstFlag flag) { } protected AST() { - this(reader.getLineNumber() + 1); + this(currentSourceLineNumber()); } protected AST(int lineNo) { @@ -2222,7 +2234,7 @@ protected AST(int lineNo) { } protected AST(AST ast1) { - this(reader.getLineNumber() + 1, ast1); + this(currentSourceLineNumber(), ast1); } protected AST(int lineNo, AST ast1) { @@ -2235,7 +2247,7 @@ protected AST(int lineNo, AST ast1) { } protected AST(AST ast1, AST ast2) { - this(reader.getLineNumber() + 1, ast1, ast2); + this(currentSourceLineNumber(), ast1, ast2); } protected AST(int lineNo, AST ast1, AST ast2) { @@ -2252,7 +2264,7 @@ protected AST(int lineNo, AST ast1, AST ast2) { } protected AST(AST ast1, AST ast2, AST ast3) { - this(reader.getLineNumber() + 1, ast1, ast2, ast3); + this(currentSourceLineNumber(), ast1, ast2, ast3); } protected AST(int lineNo, AST ast1, AST ast2, AST ast3) { @@ -2273,7 +2285,7 @@ protected AST(int lineNo, AST ast1, AST ast2, AST ast3) { } protected AST(AST ast1, AST ast2, AST ast3, AST ast4) { - this(reader.getLineNumber() + 1, ast1, ast2, ast3, ast4); + this(currentSourceLineNumber(), ast1, ast2, ast3, ast4); } protected AST(int lineNo, AST ast1, AST ast2, AST ast3, AST ast4) { From 44c42da042bc3badc79dc8dab75f573a21adb4d8 Mon Sep 17 00:00:00 2001 From: Bertrand Martin Date: Sat, 18 Apr 2026 12:02:48 -0700 Subject: [PATCH 6/6] fix: simplify arrays-of-arrays review fixes --- src/main/java/io/jawk/backend/AVM.java | 35 ++++++++++++------ src/main/java/io/jawk/frontend/AwkParser.java | 10 +++--- .../java/io/jawk/intermediate/AwkTuples.java | 15 ++++---- .../java/io/jawk/intermediate/Opcode.java | 36 ++++++++++--------- src/main/java/io/jawk/util/AwkSettings.java | 11 +++--- src/site/markdown/compatibility.md | 1 + src/site/markdown/java-compile.md | 2 +- src/site/markdown/java.md | 4 +-- src/test/java/io/jawk/AwkParserTest.java | 1 + src/test/java/io/jawk/AwkTest.java | 28 +++++++++++++++ 10 files changed, 97 insertions(+), 46 deletions(-) diff --git a/src/main/java/io/jawk/backend/AVM.java b/src/main/java/io/jawk/backend/AVM.java index 2fa93de0..f9538311 100644 --- a/src/main/java/io/jawk/backend/AVM.java +++ b/src/main/java/io/jawk/backend/AVM.java @@ -52,6 +52,7 @@ import io.jawk.intermediate.Opcode; import io.jawk.intermediate.PositionTracker; import io.jawk.intermediate.UninitializedObject; +import io.jawk.jrt.AssocArray; import io.jawk.jrt.AwkRuntimeException; import io.jawk.jrt.AwkSink; import io.jawk.jrt.BlockManager; @@ -709,16 +710,6 @@ private void executeTuples(PositionTracker position) position.next(); break; } - case BLANK_TO_ZERO: { - Object value = pop(); - if (value == null || value instanceof UninitializedObject) { - push(ZERO); - } else { - push(value); - } - position.next(); - break; - } case IFTRUE: { // arg[0] = address to jump to if top of stack is true // stack[0] = item to check @@ -1238,6 +1229,20 @@ private void executeTuples(PositionTracker position) position.next(); break; } + case PEEK_ARRAY_ELEMENT: { + // stack[0] = array index + Object idx = pop(); + checkScalar(idx); + Map map = toMap(pop()); + if (map instanceof AssocArray && !JRT.containsAwkKey(map, idx)) { + push(BLANK); + } else { + Object value = map.get(idx); + push(value != null ? value : BLANK); + } + position.next(); + break; + } case SRAND: { // arg[0] = numArgs (where 0 = no args, anything else = one argument) // stack[0] = seed (only if numArgs != 0) @@ -1719,6 +1724,11 @@ private void executeTuples(PositionTracker position) } case KEYLIST: { Object o = pop(); + if (o == null || o instanceof UninitializedObject) { + push(new ArrayDeque<>()); + position.next(); + break; + } if (!(o instanceof Map)) { throw new AwkRuntimeException( position.lineNumber(), @@ -2109,6 +2119,11 @@ private void executeTuples(PositionTracker position) Object arr = pop(); Object arg = pop(); checkScalar(arg); + if (arr == null || arr instanceof UninitializedObject) { + push(ZERO); + position.next(); + break; + } if (!(arr instanceof Map)) { throw new AwkRuntimeException("Attempting to test membership on a non-associative-array."); } diff --git a/src/main/java/io/jawk/frontend/AwkParser.java b/src/main/java/io/jawk/frontend/AwkParser.java index cb5ecdca..055ed975 100644 --- a/src/main/java/io/jawk/frontend/AwkParser.java +++ b/src/main/java/io/jawk/frontend/AwkParser.java @@ -3123,7 +3123,7 @@ public int populateTuples(AwkTuples tuples) { breakAddress = tuples.createAddress("breakAddress"); - populateArrayOperandTuples(getAst2(), tuples, true, getAst2() + " is not an array"); + populateArrayOperandTuples(getAst2(), tuples, false, getAst2() + " is not an array"); // pops the array and pushes the keyset tuples.keylist(); @@ -3468,7 +3468,7 @@ public int populateTuples(AwkTuples tuples) { } getAst1().populateTuples(tuples); - populateArrayOperandTuples(getAst2(), tuples, true, "Expecting an array for rhs of IN. Got a scalar."); + populateArrayOperandTuples(getAst2(), tuples, false, "Expecting an array for rhs of IN. Got a scalar."); tuples.isIn(); popSourceLineNumber(tuples); @@ -4479,7 +4479,7 @@ private void populateArrayValueTuples(AwkTuples tuples, boolean createIfMissing) if (createIfMissing) { tuples.ensureArrayElement(); } else { - tuples.dereferenceArray(); + tuples.peekArrayElement(); } popSourceLineNumber(tuples); } @@ -4764,7 +4764,7 @@ public int populateTuples(AwkTuples tuples) { } else { if (getAst1() instanceof ArrayReferenceAst) { ((ArrayReferenceAst) getAst1()).populateTargetValueTuples(tuples); - tuples.blankToZero(); + tuples.unaryPlus(); } else { getAst1().populateTuples(tuples); } @@ -4802,7 +4802,7 @@ public int populateTuples(AwkTuples tuples) { pushSourceLineNumber(tuples); if (getAst1() instanceof ArrayReferenceAst) { ((ArrayReferenceAst) getAst1()).populateTargetValueTuples(tuples); - tuples.blankToZero(); + tuples.unaryPlus(); } else { getAst1().populateTuples(tuples); } diff --git a/src/main/java/io/jawk/intermediate/AwkTuples.java b/src/main/java/io/jawk/intermediate/AwkTuples.java index 4a3fbbf7..4fbddd63 100644 --- a/src/main/java/io/jawk/intermediate/AwkTuples.java +++ b/src/main/java/io/jawk/intermediate/AwkTuples.java @@ -148,13 +148,6 @@ public void toNumber() { queue.add(new Tuple(Opcode.TO_NUMBER)); } - /** - * Replaces a blank or uninitialized top-of-stack value with numeric zero. - */ - public void blankToZero() { - queue.add(new Tuple(Opcode.BLANK_TO_ZERO)); - } - /** *

* ifTrue. @@ -1078,6 +1071,14 @@ public void dereferenceArray() { queue.add(new Tuple(Opcode.DEREF_ARRAY)); } + /** + * Looks up an associative-array element without creating a blank entry when + * the key is missing. + */ + public void peekArrayElement() { + queue.add(new Tuple(Opcode.PEEK_ARRAY_ELEMENT)); + } + /** * Dereferences an associative-array element as a nested array, creating it if * needed. diff --git a/src/main/java/io/jawk/intermediate/Opcode.java b/src/main/java/io/jawk/intermediate/Opcode.java index 4f5eea2a..c6133b86 100644 --- a/src/main/java/io/jawk/intermediate/Opcode.java +++ b/src/main/java/io/jawk/intermediate/Opcode.java @@ -74,14 +74,6 @@ public enum Opcode { * Stack after: x (as a number) */ TO_NUMBER, - /** - * Replaces a blank or uninitialized top-of-stack value with numeric zero. - * Non-blank values remain unchanged. - *

- * Stack before: x ...
- * Stack after: x (or 0 when blank) ... - */ - BLANK_TO_ZERO, /** * Pops and evaluates the top-of-stack; if * true, it jumps to a specified address. @@ -924,14 +916,6 @@ public enum Opcode { /** Constant DEREF_ARRAY=336 */ DEREF_ARRAY, - /** - * Dereferences an associative-array element as an array, creating a nested - * array when the element is currently blank or uninitialized. - *

- * Stack before: array-index associative-array ...
- * Stack after: nested-associative-array ... - */ - ENSURE_ARRAY_ELEMENT, // for (x in y) {keyset} support /** @@ -1451,7 +1435,25 @@ public enum Opcode { * Stack before: ...
* Stack after: x ... or 0 if uninitialized */ - POSTDEC; + POSTDEC, + + /** + * Dereferences an associative-array element as an array, creating a nested + * array when the element is currently blank or uninitialized. + *

+ * Stack before: array-index associative-array ...
+ * Stack after: nested-associative-array ... + */ + ENSURE_ARRAY_ELEMENT, + + /** + * Looks up an associative-array element without creating a blank entry when + * the key is missing. + *

+ * Stack before: array-index associative-array ...
+ * Stack after: item ... + */ + PEEK_ARRAY_ELEMENT; private static final Opcode[] VALUES = values(); diff --git a/src/main/java/io/jawk/util/AwkSettings.java b/src/main/java/io/jawk/util/AwkSettings.java index 2d5d9047..cafb127d 100644 --- a/src/main/java/io/jawk/util/AwkSettings.java +++ b/src/main/java/io/jawk/util/AwkSettings.java @@ -85,7 +85,8 @@ public class AwkSettings { private volatile boolean useSortedArrayKeys = false; /** - * Whether to accept gawk-style arrays of arrays syntax such as {@code a[i][j]}. + * Whether to accept gawk-style arrays of arrays syntax such as {@code a[i][j]} + * and subarray operands in array-only positions such as {@code split(..., a[i])}. * true by default. */ private volatile boolean allowArraysOfArrays = true; @@ -278,7 +279,8 @@ public void setUseSortedArrayKeys(boolean useSortedArrayKeys) { } /** - * Whether to accept gawk-style arrays of arrays syntax such as {@code a[i][j]}. + * Whether to accept gawk-style arrays of arrays syntax such as {@code a[i][j]} + * and subarray operands in array-only positions such as {@code split(..., a[i])}. * * @return {@code true} when arrays of arrays are enabled at compile time */ @@ -288,9 +290,10 @@ public boolean isAllowArraysOfArrays() { /** * Enables or disables gawk-style arrays of arrays syntax such as - * {@code a[i][j]}. + * {@code a[i][j]} and subarray operands in array-only positions such as + * {@code split(..., a[i])} or {@code for (k in a[i])}. * - * @param allowArraysOfArrays {@code true} to accept arrays-of-arrays syntax + * @param allowArraysOfArrays {@code true} to accept arrays-of-arrays features */ public void setAllowArraysOfArrays(boolean allowArraysOfArrays) { this.allowArraysOfArrays = allowArraysOfArrays; diff --git a/src/site/markdown/compatibility.md b/src/site/markdown/compatibility.md index 900d6771..0008f25f 100644 --- a/src/site/markdown/compatibility.md +++ b/src/site/markdown/compatibility.md @@ -15,6 +15,7 @@ Jawk keeps the AWK language model while adding JVM-oriented capabilities: - it exposes a serializable tuple representation for compilation and reuse - it can maintain associative array keys in sorted order - it supports gawk-style arrays of arrays (`a[i][j]`) in addition to classic AWK multi-dimensional subscripts (`a[i, j]`) + This compile-time feature can be disabled, in which case Jawk also rejects subarray operands in array-only positions such as `split(..., a[i])` or `for (k in a[i])`. - it supports explicit extensions - it offers a sandboxed compiler and runtime diff --git a/src/site/markdown/java-compile.md b/src/site/markdown/java-compile.md index 6d55237e..7a143282 100644 --- a/src/site/markdown/java-compile.md +++ b/src/site/markdown/java-compile.md @@ -66,7 +66,7 @@ awk.script(program) This keeps compilation and execution separate, which is useful when the same AWK program is reused across multiple inputs. -Compilation settings matter here. For example, gawk-style arrays of arrays (`a[i][j]`) are accepted by default, but you can disable that syntax before compiling: +Compilation settings matter here. For example, gawk-style arrays of arrays (`a[i][j]`) are accepted by default, but you can disable that compile-time mode before compiling. When disabled, Jawk also rejects subarray operands in array-only positions such as `split(..., a[i])` or `for (k in a[i])`: ```java AwkSettings settings = new AwkSettings(); diff --git a/src/site/markdown/java.md b/src/site/markdown/java.md index be12a84d..ede4f20b 100644 --- a/src/site/markdown/java.md +++ b/src/site/markdown/java.md @@ -36,14 +36,14 @@ Awk awk = new Awk(settings); | `setLocale(Locale)` | `Locale.US` | Locale for numeric output formatting | | `setDefaultRS(String)` | Platform line separator | Default value for `RS`, the record separator | | `setUseSortedArrayKeys(boolean)` | `false` | Whether to keep associative array keys in sorted order | -| `setAllowArraysOfArrays(boolean)` | `true` | Whether the compiler accepts gawk-style nested array syntax such as `a[i][j]` | +| `setAllowArraysOfArrays(boolean)` | `true` | Whether the compiler accepts gawk-style nested array features such as `a[i][j]` and `split(..., a[i])` | | `putVariable(String, Object)` | Empty map | Pre-set variables available before `BEGIN` | Output destination is specified per-call on the builder (`execute()`, `execute(PrintStream)`, `execute(OutputStream)`, `execute(Appendable)`, or `execute(AwkSink)`). See the [Custom Output](java-output.html) guide for details. For more on passing variables to scripts, see [Variables and Arguments](java-variables.html). -By default, Jawk accepts both classic multi-dimensional array syntax (`a[i, j]`) and gawk-style arrays of arrays (`a[i][j]`). Disable the gawk-style parser mode when you need strict classic AWK parsing: +By default, Jawk accepts both classic multi-dimensional array syntax (`a[i, j]`) and gawk-style arrays of arrays (`a[i][j]`). Disable this compile-time mode when you need strict classic AWK parsing; doing so also rejects subarray operands in array-only positions such as `split(..., a[i])`, `for (k in a[i])`, and `"x" in a[i]`: ```java AwkSettings settings = new AwkSettings(); diff --git a/src/test/java/io/jawk/AwkParserTest.java b/src/test/java/io/jawk/AwkParserTest.java index 77f57286..6df5e99d 100644 --- a/src/test/java/io/jawk/AwkParserTest.java +++ b/src/test/java/io/jawk/AwkParserTest.java @@ -201,6 +201,7 @@ public void testArraysOfArraysCanBeDisabled() { assertThrows(ParserException.class, () -> awk.compile("BEGIN { a[1][2] = 42 }")); assertThrows(RuntimeException.class, () -> awk.compile("BEGIN { print ((\"x\" in a[1]) ? 1 : 0) }")); + assertThrows(RuntimeException.class, () -> awk.compile("BEGIN { for (k in a[1]) print k }")); } @Test diff --git a/src/test/java/io/jawk/AwkTest.java b/src/test/java/io/jawk/AwkTest.java index 5804ab74..55d85c8c 100644 --- a/src/test/java/io/jawk/AwkTest.java +++ b/src/test/java/io/jawk/AwkTest.java @@ -872,6 +872,24 @@ public void testArraysOfArraysMembershipDeleteAndIteration() throws Exception { .runAndAssert(); } + @Test + public void testArraysOfArraysInDoesNotAutovivifyMissingSubarray() throws Exception { + AwkTestSupport + .awkTest("arrays of arrays in does not autovivify missing subarray") + .script("BEGIN { print ((\"x\" in a[1]) ? \"yes\" : \"no\"); print ((1 in a) ? \"yes\" : \"no\") }") + .expectLines("no", "no") + .runAndAssert(); + } + + @Test + public void testArraysOfArraysForInDoesNotAutovivifyMissingSubarray() throws Exception { + AwkTestSupport + .awkTest("arrays of arrays for-in does not autovivify missing subarray") + .script("BEGIN { for (k in a[1]) print k; print ((1 in a) ? \"yes\" : \"no\") }") + .expectLines("no") + .runAndAssert(); + } + @Test public void testArraysOfArraysSupportsNestedIncrement() throws Exception { AwkTestSupport @@ -881,6 +899,16 @@ public void testArraysOfArraysSupportsNestedIncrement() throws Exception { .runAndAssert(); } + @Test + public void testArraysOfArraysPostfixOperatorsUseNumericOldValue() throws Exception { + AwkTestSupport + .awkTest("arrays of arrays postfix operators use numeric old value") + .script( + "BEGIN { a[1][1] = \"abc\"; oldInc = a[1][1]++; a[1][2] = \"abc\"; oldDec = a[1][2]--; print oldInc, a[1][1]; print oldDec, a[1][2] }") + .expectLines("0 1", "0 -1") + .runAndAssert(); + } + @Test public void testArraysOfArraysSupportsSplitIntoSubarray() throws Exception { AwkTestSupport