From 8881468f8b8d6e3d64f44fa8382c56034cf38c42 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 13 Feb 2026 23:27:27 +0100 Subject: [PATCH 1/4] Implement interpreter features: POW_SCALAR, list assignment fixes, ARRAY_GET/ARRAY_SIZE enhancements - Add POW_SCALAR (22) opcode for exponentiation operator (**) - Fix ARRAY_SIZE to return count for RuntimeList instead of last element - Fix ARRAY_GET to handle both RuntimeArray and RuntimeList - Fix SLOWOP_LIST_SLICE_FROM to read correct number of shorts (2 instead of 4) - Add disassembler support for SCALAR_TO_LIST, LIST_TO_SCALAR, POW_SCALAR, and SLOWOP_LIST_SLICE_FROM - Improve interpreter error formatting to show PC and token index Progress: 3 subtests of demo.t now pass completely (26 tests passing) Co-Authored-By: Claude Opus 4.6 --- .../interpreter/BytecodeCompiler.java | 841 ++++++++++++++++-- .../interpreter/BytecodeInterpreter.java | 176 +++- .../interpreter/InterpretedCode.java | 49 + .../org/perlonjava/interpreter/Opcodes.java | 18 + .../interpreter/SlowOpcodeHandler.java | 92 ++ 5 files changed, 1084 insertions(+), 92 deletions(-) diff --git a/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java b/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java index 801c17b38..40db99d35 100644 --- a/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java @@ -1029,6 +1029,95 @@ public void visit(BinaryOperatorNode node) { emitReg(valueReg); lastResultReg = hashReg; + } else if (leftOp.operator.equals("our")) { + // Assignment to our variable: our $x = value or our @x = value or our %x = value + // Compile the our declaration first (which loads the global into a register) + leftOp.accept(this); + int targetReg = lastResultReg; + + // Now assign the RHS value to the target register + // The target register contains either a scalar, array, or hash + // We need to determine which and use the appropriate assignment + + // Extract the sigil from our operand + if (leftOp.operand instanceof OperatorNode) { + OperatorNode sigilOp = (OperatorNode) leftOp.operand; + String sigil = sigilOp.operator; + + if (sigil.equals("$")) { + // Scalar: use MOVE + emit(Opcodes.MOVE); + emitReg(targetReg); + emitReg(valueReg); + } else if (sigil.equals("@")) { + // Array: use ARRAY_SET_FROM_LIST + emit(Opcodes.ARRAY_SET_FROM_LIST); + emitReg(targetReg); + emitReg(valueReg); + } else if (sigil.equals("%")) { + // Hash: use HASH_SET_FROM_LIST + emit(Opcodes.HASH_SET_FROM_LIST); + emitReg(targetReg); + emitReg(valueReg); + } + } else if (leftOp.operand instanceof ListNode) { + // our ($a, $b) = ... - list declaration with assignment + // The our statement already declared the variables and returned a list + // We need to assign the RHS values to each variable + ListNode listNode = (ListNode) leftOp.operand; + + // Convert RHS to list + int rhsListReg = allocateRegister(); + emit(Opcodes.SCALAR_TO_LIST); + emitReg(rhsListReg); + emitReg(valueReg); + + // Assign each element + for (int i = 0; i < listNode.elements.size(); i++) { + Node element = listNode.elements.get(i); + if (element instanceof OperatorNode) { + OperatorNode sigilOp = (OperatorNode) element; + String sigil = sigilOp.operator; + + if (sigilOp.operand instanceof IdentifierNode) { + String varName = sigil + ((IdentifierNode) sigilOp.operand).name; + int varReg = getVariableRegister(varName); + + // Get i-th element from RHS + int indexReg = allocateRegister(); + emit(Opcodes.LOAD_INT); + emitReg(indexReg); + emitInt(i); + + int elemReg = allocateRegister(); + emit(Opcodes.ARRAY_GET); + emitReg(elemReg); + emitReg(rhsListReg); + emitReg(indexReg); + + // Assign to variable + if (sigil.equals("$")) { + emit(Opcodes.MOVE); + emitReg(varReg); + emitReg(elemReg); + } else if (sigil.equals("@")) { + emit(Opcodes.ARRAY_SET_FROM_LIST); + emitReg(varReg); + emitReg(elemReg); + } else if (sigil.equals("%")) { + emit(Opcodes.HASH_SET_FROM_LIST); + emitReg(varReg); + emitReg(elemReg); + } + } + } + } + lastResultReg = valueReg; + currentCallContext = savedContext; + return; + } + + lastResultReg = targetReg; } else { throw new RuntimeException("Assignment to unsupported operator: " + leftOp.operator); } @@ -1374,6 +1463,156 @@ public void visit(BinaryOperatorNode node) { } throwCompilerException("Assignment to non-identifier not yet supported: " + node.left.getClass().getSimpleName()); + } else if (node.left instanceof ListNode) { + // List assignment: ($a, $b) = ... or () = ... + // In scalar context, returns the number of elements on RHS + // In list context, returns the RHS list + ListNode listNode = (ListNode) node.left; + + // Compile RHS in LIST context to get all elements + int savedRhsContext = currentCallContext; + currentCallContext = RuntimeContextType.LIST; + node.right.accept(this); + int rhsReg = lastResultReg; + currentCallContext = savedRhsContext; + + // Convert RHS to RuntimeList if needed + int rhsListReg = allocateRegister(); + emit(Opcodes.SCALAR_TO_LIST); + emitReg(rhsListReg); + emitReg(rhsReg); + + // If the list is not empty, perform the assignment + if (!listNode.elements.isEmpty()) { + // Assign each RHS element to corresponding LHS variable + for (int i = 0; i < listNode.elements.size(); i++) { + Node lhsElement = listNode.elements.get(i); + + // Get the i-th element from RHS list + int indexReg = allocateRegister(); + emit(Opcodes.LOAD_INT); + emitReg(indexReg); + emitInt(i); + + int elementReg = allocateRegister(); + emit(Opcodes.ARRAY_GET); + emitReg(elementReg); + emitReg(rhsListReg); + emitReg(indexReg); + + // Assign to LHS element + if (lhsElement instanceof OperatorNode) { + OperatorNode lhsOp = (OperatorNode) lhsElement; + if (lhsOp.operator.equals("$") && lhsOp.operand instanceof IdentifierNode) { + String varName = "$" + ((IdentifierNode) lhsOp.operand).name; + + if (hasVariable(varName)) { + int targetReg = getVariableRegister(varName); + if (capturedVarIndices != null && capturedVarIndices.containsKey(varName)) { + emit(Opcodes.SET_SCALAR); + emitReg(targetReg); + emitReg(elementReg); + } else { + emit(Opcodes.MOVE); + emitReg(targetReg); + emitReg(elementReg); + } + } else { + int nameIdx = addToStringPool(varName); + emit(Opcodes.STORE_GLOBAL_SCALAR); + emit(nameIdx); + emitReg(elementReg); + } + } else if (lhsOp.operator.equals("@") && lhsOp.operand instanceof IdentifierNode) { + // Array slurp: ($a, @rest) = ... + // Collect remaining elements into a RuntimeList + String varName = "@" + ((IdentifierNode) lhsOp.operand).name; + + int arrayReg; + if (hasVariable(varName)) { + arrayReg = getVariableRegister(varName); + } else { + arrayReg = allocateRegister(); + String globalArrayName = NameNormalizer.normalizeVariableName( + ((IdentifierNode) lhsOp.operand).name, + getCurrentPackage() + ); + int nameIdx = addToStringPool(globalArrayName); + emit(Opcodes.LOAD_GLOBAL_ARRAY); + emitReg(arrayReg); + emit(nameIdx); + } + + // Create a list of remaining indices + // Use SLOWOP_LIST_SLICE_FROM to get list[i..] + int remainingListReg = allocateRegister(); + emit(Opcodes.SLOW_OP); + emit(Opcodes.SLOWOP_LIST_SLICE_FROM); + emitReg(remainingListReg); + emitReg(rhsListReg); + emitInt(i); // Start index + + // Populate array from remaining elements + emit(Opcodes.ARRAY_SET_FROM_LIST); + emitReg(arrayReg); + emitReg(remainingListReg); + + // Array slurp consumes all remaining elements + break; + } else if (lhsOp.operator.equals("%") && lhsOp.operand instanceof IdentifierNode) { + // Hash slurp: ($a, %rest) = ... + String varName = "%" + ((IdentifierNode) lhsOp.operand).name; + + int hashReg; + if (hasVariable(varName)) { + hashReg = getVariableRegister(varName); + } else { + hashReg = allocateRegister(); + String globalHashName = NameNormalizer.normalizeVariableName( + ((IdentifierNode) lhsOp.operand).name, + getCurrentPackage() + ); + int nameIdx = addToStringPool(globalHashName); + emit(Opcodes.LOAD_GLOBAL_HASH); + emitReg(hashReg); + emit(nameIdx); + } + + // Get remaining elements from list + int remainingListReg = allocateRegister(); + emit(Opcodes.SLOW_OP); + emit(Opcodes.SLOWOP_LIST_SLICE_FROM); + emitReg(remainingListReg); + emitReg(rhsListReg); + emitInt(i); // Start index + + // Populate hash from remaining elements + emit(Opcodes.HASH_SET_FROM_LIST); + emitReg(hashReg); + emitReg(remainingListReg); + + // Hash slurp consumes all remaining elements + break; + } + } + } + } + + // Return value depends on savedContext (the context this assignment was called in) + if (savedContext == RuntimeContextType.SCALAR) { + // In scalar context, list assignment returns the count of RHS elements + int countReg = allocateRegister(); + emit(Opcodes.ARRAY_SIZE); + emitReg(countReg); + emitReg(rhsListReg); + lastResultReg = countReg; + } else { + // In list context, return the RHS value + lastResultReg = rhsListReg; + } + + currentCallContext = savedContext; + return; } else { throwCompilerException("Assignment to non-identifier not yet supported: " + node.left.getClass().getSimpleName()); } @@ -1529,6 +1768,12 @@ public void visit(BinaryOperatorNode node) { emitReg(rs1); emitReg(rs2); } + case "**" -> { + emit(Opcodes.POW_SCALAR); + emitReg(rd); + emitReg(rs1); + emitReg(rs2); + } case "." -> { emit(Opcodes.CONCAT); emitReg(rd); @@ -1547,6 +1792,30 @@ public void visit(BinaryOperatorNode node) { emitReg(rs1); emitReg(rs2); } + case "cmp" -> { + emit(Opcodes.COMPARE_STR); + emitReg(rd); + emitReg(rs1); + emitReg(rs2); + } + case "bless" -> { + // bless $ref, "Package" or bless $ref (defaults to current package) + // rs1 = reference to bless + // rs2 = package name (or undef for current package) + emit(Opcodes.BLESS); + emitReg(rd); + emitReg(rs1); + emitReg(rs2); + } + case "isa" -> { + // $obj isa "Package" - check if object is instance of package + // rs1 = object/reference + // rs2 = package name + emit(Opcodes.ISA); + emitReg(rd); + emitReg(rs1); + emitReg(rs2); + } case "==" -> { emit(Opcodes.EQ_NUM); emitReg(rd); @@ -2095,33 +2364,50 @@ public void visit(BinaryOperatorNode node) { emitReg(keyReg); } case "push" -> { - // Array push: push(@array, values...) - // left: OperatorNode("@", IdentifierNode("array")) + // Array push: push(@array, values...) or push(@$ref, values...) + // left: OperatorNode("@", IdentifierNode("array")) or OperatorNode("@", OperatorNode("$", ...)) // right: ListNode with values to push if (!(node.left instanceof OperatorNode)) { throwCompilerException("push requires array variable"); } OperatorNode leftOp = (OperatorNode) node.left; - if (!leftOp.operator.equals("@") || !(leftOp.operand instanceof IdentifierNode)) { + if (!leftOp.operator.equals("@")) { throwCompilerException("push requires array variable: push @array, values"); } - String varName = "@" + ((IdentifierNode) leftOp.operand).name; + int arrayReg = -1; // Will be assigned in if/else blocks - // Get the array - check lexical first, then global - int arrayReg; - if (hasVariable(varName)) { - // Lexical array - arrayReg = getVariableRegister(varName); - } else { - // Global array - load it + if (leftOp.operand instanceof IdentifierNode) { + // push @array + String varName = "@" + ((IdentifierNode) leftOp.operand).name; + + // Get the array - check lexical first, then global + if (hasVariable(varName)) { + // Lexical array + arrayReg = getVariableRegister(varName); + } else { + // Global array - load it + arrayReg = allocateRegister(); + String globalArrayName = getCurrentPackage() + "::" + ((IdentifierNode) leftOp.operand).name; + int nameIdx = addToStringPool(globalArrayName); + emit(Opcodes.LOAD_GLOBAL_ARRAY); + emitReg(arrayReg); + emit(nameIdx); + } + } else if (leftOp.operand instanceof OperatorNode) { + // push @$ref - dereference first + leftOp.operand.accept(this); + int refReg = lastResultReg; + + // Dereference to get the array arrayReg = allocateRegister(); - String globalArrayName = getCurrentPackage() + "::" + ((IdentifierNode) leftOp.operand).name; - int nameIdx = addToStringPool(globalArrayName); - emit(Opcodes.LOAD_GLOBAL_ARRAY); + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_DEREF_ARRAY); emitReg(arrayReg); - emit(nameIdx); + emitReg(refReg); + } else { + throwCompilerException("push requires array variable or dereferenced array: push @array or push @$ref"); } // Evaluate the values to push (right operand) @@ -2138,33 +2424,53 @@ public void visit(BinaryOperatorNode node) { lastResultReg = arrayReg; } case "unshift" -> { - // Array unshift: unshift(@array, values...) - // left: OperatorNode("@", IdentifierNode("array")) + // Array unshift: unshift(@array, values...) or unshift(@$ref, values...) + // left: OperatorNode("@", IdentifierNode("array")) or OperatorNode("@", OperatorNode("$", ...)) // right: ListNode with values to unshift if (!(node.left instanceof OperatorNode)) { throwCompilerException("unshift requires array variable"); } OperatorNode leftOp = (OperatorNode) node.left; - if (!leftOp.operator.equals("@") || !(leftOp.operand instanceof IdentifierNode)) { + if (!leftOp.operator.equals("@")) { throwCompilerException("unshift requires array variable: unshift @array, values"); } - String varName = "@" + ((IdentifierNode) leftOp.operand).name; + int arrayReg = -1; // Will be assigned in if/else blocks - // Get the array - check lexical first, then global - int arrayReg; - if (hasVariable(varName)) { - // Lexical array - arrayReg = getVariableRegister(varName); - } else { - // Global array - load it + if (leftOp.operand instanceof IdentifierNode) { + // unshift @array + String varName = "@" + ((IdentifierNode) leftOp.operand).name; + + // Get the array - check lexical first, then global + if (hasVariable(varName)) { + // Lexical array + arrayReg = getVariableRegister(varName); + } else { + // Global array - load it + arrayReg = allocateRegister(); + String globalArrayName = NameNormalizer.normalizeVariableName( + ((IdentifierNode) leftOp.operand).name, + getCurrentPackage() + ); + int nameIdx = addToStringPool(globalArrayName); + emit(Opcodes.LOAD_GLOBAL_ARRAY); + emitReg(arrayReg); + emit(nameIdx); + } + } else if (leftOp.operand instanceof OperatorNode) { + // unshift @$ref - dereference first + leftOp.operand.accept(this); + int refReg = lastResultReg; + + // Dereference to get the array arrayReg = allocateRegister(); - String globalArrayName = getCurrentPackage() + "::" + ((IdentifierNode) leftOp.operand).name; - int nameIdx = addToStringPool(globalArrayName); - emit(Opcodes.LOAD_GLOBAL_ARRAY); + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_DEREF_ARRAY); emitReg(arrayReg); - emit(nameIdx); + emitReg(refReg); + } else { + throwCompilerException("unshift requires array variable or dereferenced array: unshift @array or unshift @$ref"); } // Evaluate the values to unshift (right operand) @@ -2303,6 +2609,96 @@ public void visit(OperatorNode node) { lastResultReg = reg; return; } + } else if (node.operand instanceof ListNode) { + // my ($x, $y, @rest) - list of variable declarations + ListNode listNode = (ListNode) node.operand; + List varRegs = new ArrayList<>(); + + for (Node element : listNode.elements) { + if (element instanceof OperatorNode) { + OperatorNode sigilOp = (OperatorNode) element; + String sigil = sigilOp.operator; + + if (sigilOp.operand instanceof IdentifierNode) { + String varName = sigil + ((IdentifierNode) sigilOp.operand).name; + + // Check if this variable is captured by closures + if (sigilOp.id != 0) { + // Variable is captured - use persistent storage + int reg = allocateRegister(); + int nameIdx = addToStringPool(varName); + + switch (sigil) { + case "$" -> { + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_RETRIEVE_BEGIN_SCALAR); + emitReg(reg); + emit(nameIdx); + emit(sigilOp.id); + variableScopes.peek().put(varName, reg); + } + case "@" -> { + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_RETRIEVE_BEGIN_ARRAY); + emitReg(reg); + emit(nameIdx); + emit(sigilOp.id); + variableScopes.peek().put(varName, reg); + } + case "%" -> { + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_RETRIEVE_BEGIN_HASH); + emitReg(reg); + emit(nameIdx); + emit(sigilOp.id); + variableScopes.peek().put(varName, reg); + } + default -> throwCompilerException("Unsupported variable type in list declaration: " + sigil); + } + + varRegs.add(reg); + } else { + // Regular lexical variable + int reg = addVariable(varName, "my"); + + // Initialize the variable + switch (sigil) { + case "$" -> { + emit(Opcodes.LOAD_UNDEF); + emitReg(reg); + } + case "@" -> { + emit(Opcodes.NEW_ARRAY); + emitReg(reg); + } + case "%" -> { + emit(Opcodes.NEW_HASH); + emitReg(reg); + } + default -> throwCompilerException("Unsupported variable type in list declaration: " + sigil); + } + + varRegs.add(reg); + } + } else { + throwCompilerException("my list declaration requires identifier: " + sigilOp.operand.getClass().getSimpleName()); + } + } else { + throwCompilerException("my list declaration requires scalar/array/hash: " + element.getClass().getSimpleName()); + } + } + + // Return a list of the declared variables + int resultReg = allocateRegister(); + emit(Opcodes.CREATE_LIST); + emitReg(resultReg); + emit(varRegs.size()); + for (int varReg : varRegs) { + emitReg(varReg); + } + + lastResultReg = resultReg; + return; } throw new RuntimeException("Unsupported my operand: " + node.operand.getClass().getSimpleName()); } else if (op.equals("our")) { @@ -2353,6 +2749,74 @@ public void visit(OperatorNode node) { lastResultReg = reg; return; } + } else if (node.operand instanceof ListNode) { + // our ($x, $y) - list of package variable declarations + ListNode listNode = (ListNode) node.operand; + List varRegs = new ArrayList<>(); + + String packageName = getCurrentPackage(); + + for (Node element : listNode.elements) { + if (element instanceof OperatorNode) { + OperatorNode sigilOp = (OperatorNode) element; + String sigil = sigilOp.operator; + + if (sigilOp.operand instanceof IdentifierNode) { + String varName = sigil + ((IdentifierNode) sigilOp.operand).name; + + int reg; + // Check if already declared in current scope + if (hasVariable(varName)) { + // Already declared, just use existing register + reg = getVariableRegister(varName); + } else { + // Allocate register and add to symbol table + reg = addVariable(varName, "our"); + + // Load from global variable + String globalVarName = packageName + "::" + ((IdentifierNode) sigilOp.operand).name; + int nameIdx = addToStringPool(globalVarName); + + switch (sigil) { + case "$" -> { + emit(Opcodes.LOAD_GLOBAL_SCALAR); + emitReg(reg); + emit(nameIdx); + } + case "@" -> { + emit(Opcodes.LOAD_GLOBAL_ARRAY); + emitReg(reg); + emit(nameIdx); + } + case "%" -> { + emit(Opcodes.LOAD_GLOBAL_HASH); + emitReg(reg); + emit(nameIdx); + } + default -> throwCompilerException("Unsupported variable type in list declaration: " + sigil); + } + } + + varRegs.add(reg); + } else { + throwCompilerException("our list declaration requires identifier: " + sigilOp.operand.getClass().getSimpleName()); + } + } else { + throwCompilerException("our list declaration requires scalar/array/hash: " + element.getClass().getSimpleName()); + } + } + + // Return a list of the declared variables + int resultReg = allocateRegister(); + emit(Opcodes.CREATE_LIST); + emitReg(resultReg); + emit(varRegs.size()); + for (int varReg : varRegs) { + emitReg(varReg); + } + + lastResultReg = resultReg; + return; } throw new RuntimeException("Unsupported our operand: " + node.operand.getClass().getSimpleName()); } else if (op.equals("local")) { @@ -2483,6 +2947,21 @@ public void visit(OperatorNode node) { // Note: We don't check scalar context here because dereferencing // should return the array itself. The slice or other operation // will handle scalar context conversion if needed. + } else if (node.operand instanceof BlockNode) { + // @{ block } - evaluate block and dereference the result + // The block should return an arrayref + BlockNode blockNode = (BlockNode) node.operand; + blockNode.accept(this); + int refReg = lastResultReg; + + // Dereference to get the array + int rd = allocateRegister(); + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_DEREF_ARRAY); + emitReg(rd); + emitReg(refReg); + + lastResultReg = rd; } else { throwCompilerException("Unsupported @ operand: " + node.operand.getClass().getSimpleName()); } @@ -2707,6 +3186,54 @@ public void visit(OperatorNode node) { } else { throw new RuntimeException("NOT operator requires operand"); } + } else if (op.equals("defined")) { + // Defined operator: defined($x) + // Check if value is defined (not undef) + if (node.operand != null) { + node.operand.accept(this); + int rs = lastResultReg; + + // Allocate result register + int rd = allocateRegister(); + + // Emit DEFINED opcode + emit(Opcodes.DEFINED); + emitReg(rd); + emitReg(rs); + + lastResultReg = rd; + } else { + throw new RuntimeException("defined operator requires operand"); + } + } else if (op.equals("ref")) { + // Ref operator: ref($x) + // Get reference type (blessed class name or base type) + if (node.operand == null) { + throwCompilerException("ref requires an argument"); + } + + // Compile the operand + if (node.operand instanceof ListNode) { + ListNode list = (ListNode) node.operand; + if (list.elements.isEmpty()) { + throwCompilerException("ref requires an argument"); + } + // Get first element + list.elements.get(0).accept(this); + } else { + node.operand.accept(this); + } + int argReg = lastResultReg; + + // Allocate result register + int rd = allocateRegister(); + + // Emit REF opcode + emit(Opcodes.REF); + emitReg(rd); + emitReg(argReg); + + lastResultReg = rd; } else if (op.equals("++") || op.equals("--") || op.equals("++postfix") || op.equals("--postfix")) { // Pre/post increment/decrement boolean isPostfix = op.endsWith("postfix"); @@ -3026,8 +3553,8 @@ public void visit(OperatorNode node) { lastResultReg = rd; } else if (op.equals("pop")) { - // Array pop: $x = pop @array - // operand: ListNode containing OperatorNode("@", IdentifierNode) + // Array pop: $x = pop @array or $x = pop @$ref + // operand: ListNode containing OperatorNode("@", IdentifierNode or OperatorNode) if (node.operand == null || !(node.operand instanceof ListNode)) { throwCompilerException("pop requires array argument"); } @@ -3038,25 +3565,42 @@ public void visit(OperatorNode node) { } OperatorNode arrayOp = (OperatorNode) list.elements.get(0); - if (!arrayOp.operator.equals("@") || !(arrayOp.operand instanceof IdentifierNode)) { + if (!arrayOp.operator.equals("@")) { throwCompilerException("pop requires array variable: pop @array"); } - String varName = "@" + ((IdentifierNode) arrayOp.operand).name; + int arrayReg = -1; // Will be assigned in if/else blocks - // Get the array - check lexical first, then global - int arrayReg; - if (hasVariable(varName)) { - // Lexical array - arrayReg = getVariableRegister(varName); - } else { - // Global array - load it + if (arrayOp.operand instanceof IdentifierNode) { + // pop @array + String varName = "@" + ((IdentifierNode) arrayOp.operand).name; + + // Get the array - check lexical first, then global + if (hasVariable(varName)) { + // Lexical array + arrayReg = getVariableRegister(varName); + } else { + // Global array - load it + arrayReg = allocateRegister(); + String globalArrayName = NameNormalizer.normalizeVariableName(((IdentifierNode) arrayOp.operand).name, getCurrentPackage()); + int nameIdx = addToStringPool(globalArrayName); + emit(Opcodes.LOAD_GLOBAL_ARRAY); + emitReg(arrayReg); + emit(nameIdx); + } + } else if (arrayOp.operand instanceof OperatorNode) { + // pop @$ref - dereference first + arrayOp.operand.accept(this); + int refReg = lastResultReg; + + // Dereference to get the array arrayReg = allocateRegister(); - String globalArrayName = NameNormalizer.normalizeVariableName(((IdentifierNode) arrayOp.operand).name, getCurrentPackage()); - int nameIdx = addToStringPool(globalArrayName); - emit(Opcodes.LOAD_GLOBAL_ARRAY); + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_DEREF_ARRAY); emitReg(arrayReg); - emit(nameIdx); + emitReg(refReg); + } else { + throwCompilerException("pop requires array variable or dereferenced array: pop @array or pop @$ref"); } // Allocate result register @@ -3069,8 +3613,8 @@ public void visit(OperatorNode node) { lastResultReg = rd; } else if (op.equals("shift")) { - // Array shift: $x = shift @array - // operand: ListNode containing OperatorNode("@", IdentifierNode) + // Array shift: $x = shift @array or $x = shift @$ref + // operand: ListNode containing OperatorNode("@", IdentifierNode or OperatorNode) if (node.operand == null || !(node.operand instanceof ListNode)) { throwCompilerException("shift requires array argument"); } @@ -3081,25 +3625,42 @@ public void visit(OperatorNode node) { } OperatorNode arrayOp = (OperatorNode) list.elements.get(0); - if (!arrayOp.operator.equals("@") || !(arrayOp.operand instanceof IdentifierNode)) { + if (!arrayOp.operator.equals("@")) { throwCompilerException("shift requires array variable: shift @array"); } - String varName = "@" + ((IdentifierNode) arrayOp.operand).name; + int arrayReg = -1; // Will be assigned in if/else blocks - // Get the array - check lexical first, then global - int arrayReg; - if (hasVariable(varName)) { - // Lexical array - arrayReg = getVariableRegister(varName); - } else { - // Global array - load it + if (arrayOp.operand instanceof IdentifierNode) { + // shift @array + String varName = "@" + ((IdentifierNode) arrayOp.operand).name; + + // Get the array - check lexical first, then global + if (hasVariable(varName)) { + // Lexical array + arrayReg = getVariableRegister(varName); + } else { + // Global array - load it + arrayReg = allocateRegister(); + String globalArrayName = NameNormalizer.normalizeVariableName(((IdentifierNode) arrayOp.operand).name, getCurrentPackage()); + int nameIdx = addToStringPool(globalArrayName); + emit(Opcodes.LOAD_GLOBAL_ARRAY); + emitReg(arrayReg); + emit(nameIdx); + } + } else if (arrayOp.operand instanceof OperatorNode) { + // shift @$ref - dereference first + arrayOp.operand.accept(this); + int refReg = lastResultReg; + + // Dereference to get the array arrayReg = allocateRegister(); - String globalArrayName = NameNormalizer.normalizeVariableName(((IdentifierNode) arrayOp.operand).name, getCurrentPackage()); - int nameIdx = addToStringPool(globalArrayName); - emit(Opcodes.LOAD_GLOBAL_ARRAY); + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_DEREF_ARRAY); emitReg(arrayReg); - emit(nameIdx); + emitReg(refReg); + } else { + throwCompilerException("shift requires array variable or dereferenced array: shift @array or shift @$ref"); } // Allocate result register @@ -3125,28 +3686,45 @@ public void visit(OperatorNode node) { // First element is the array OperatorNode arrayOp = (OperatorNode) list.elements.get(0); - if (!arrayOp.operator.equals("@") || !(arrayOp.operand instanceof IdentifierNode)) { + if (!arrayOp.operator.equals("@")) { throwCompilerException("splice requires array variable: splice @array, ..."); } - String varName = "@" + ((IdentifierNode) arrayOp.operand).name; + int arrayReg = -1; // Will be assigned in if/else blocks - // Get the array - check lexical first, then global - int arrayReg; - if (hasVariable(varName)) { - // Lexical array - arrayReg = getVariableRegister(varName); - } else { - // Global array - load it + if (arrayOp.operand instanceof IdentifierNode) { + // splice @array + String varName = "@" + ((IdentifierNode) arrayOp.operand).name; + + // Get the array - check lexical first, then global + if (hasVariable(varName)) { + // Lexical array + arrayReg = getVariableRegister(varName); + } else { + // Global array - load it + arrayReg = allocateRegister(); + String globalArrayName = NameNormalizer.normalizeVariableName( + ((IdentifierNode) arrayOp.operand).name, + getCurrentPackage() + ); + int nameIdx = addToStringPool(globalArrayName); + emit(Opcodes.LOAD_GLOBAL_ARRAY); + emitReg(arrayReg); + emit(nameIdx); + } + } else if (arrayOp.operand instanceof OperatorNode) { + // splice @$ref - dereference first + arrayOp.operand.accept(this); + int refReg = lastResultReg; + + // Dereference to get the array arrayReg = allocateRegister(); - String globalArrayName = NameNormalizer.normalizeVariableName( - ((IdentifierNode) arrayOp.operand).name, - getCurrentPackage() - ); - int nameIdx = addToStringPool(globalArrayName); - emit(Opcodes.LOAD_GLOBAL_ARRAY); + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_DEREF_ARRAY); emitReg(arrayReg); - emit(nameIdx); + emitReg(refReg); + } else { + throwCompilerException("splice requires array variable or dereferenced array: splice @array or splice @$ref"); } // Create a list with the remaining arguments (offset, length, replacement values) @@ -3559,6 +4137,117 @@ public void visit(OperatorNode node) { emitReg(rd); emitReg(hashReg); + lastResultReg = rd; + } else if (op.equals("$#")) { + // $#array - get last index of array (size - 1) + // operand: array variable (OperatorNode("@" ...) or IdentifierNode) + if (node.operand == null) { + throwCompilerException("$# requires an array argument"); + } + + int arrayReg = -1; + + // Handle different operand types + if (node.operand instanceof OperatorNode) { + OperatorNode operandOp = (OperatorNode) node.operand; + + if (operandOp.operator.equals("@") && operandOp.operand instanceof IdentifierNode) { + // $#@array or $#array (both work) + String varName = "@" + ((IdentifierNode) operandOp.operand).name; + + if (hasVariable(varName)) { + arrayReg = getVariableRegister(varName); + } else { + arrayReg = allocateRegister(); + String globalArrayName = NameNormalizer.normalizeVariableName( + ((IdentifierNode) operandOp.operand).name, + getCurrentPackage() + ); + int nameIdx = addToStringPool(globalArrayName); + emit(Opcodes.LOAD_GLOBAL_ARRAY); + emitReg(arrayReg); + emit(nameIdx); + } + } else if (operandOp.operator.equals("$")) { + // $#$ref - dereference first + operandOp.accept(this); + int refReg = lastResultReg; + + arrayReg = allocateRegister(); + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_DEREF_ARRAY); + emitReg(arrayReg); + emitReg(refReg); + } else { + throwCompilerException("$# requires array variable or dereferenced array"); + } + } else if (node.operand instanceof IdentifierNode) { + // $#array (without @) + String varName = "@" + ((IdentifierNode) node.operand).name; + + if (hasVariable(varName)) { + arrayReg = getVariableRegister(varName); + } else { + arrayReg = allocateRegister(); + String globalArrayName = NameNormalizer.normalizeVariableName( + ((IdentifierNode) node.operand).name, + getCurrentPackage() + ); + int nameIdx = addToStringPool(globalArrayName); + emit(Opcodes.LOAD_GLOBAL_ARRAY); + emitReg(arrayReg); + emit(nameIdx); + } + } else { + throwCompilerException("$# requires array variable"); + } + + // Get array size + int sizeReg = allocateRegister(); + emit(Opcodes.ARRAY_SIZE); + emitReg(sizeReg); + emitReg(arrayReg); + + // Subtract 1 to get last index + int oneReg = allocateRegister(); + emit(Opcodes.LOAD_INT); + emitReg(oneReg); + emitInt(1); + + int rd = allocateRegister(); + emit(Opcodes.SUB_SCALAR); + emitReg(rd); + emitReg(sizeReg); + emitReg(oneReg); + + lastResultReg = rd; + } else if (op.equals("length")) { + // length($string) - get string length + // operand: ListNode containing the string argument + if (node.operand == null) { + throwCompilerException("length requires an argument"); + } + + // Compile the operand + if (node.operand instanceof ListNode) { + ListNode list = (ListNode) node.operand; + if (list.elements.isEmpty()) { + throwCompilerException("length requires an argument"); + } + // Get first element + list.elements.get(0).accept(this); + } else { + node.operand.accept(this); + } + int stringReg = lastResultReg; + + // Call length builtin using SLOW_OP + int rd = allocateRegister(); + emit(Opcodes.SLOW_OP); + emit(Opcodes.SLOWOP_LENGTH); + emitReg(rd); + emitReg(stringReg); + lastResultReg = rd; } else { throwCompilerException("Unsupported operator: " + op); diff --git a/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java b/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java index 11ff18287..b9fc76f19 100644 --- a/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java @@ -338,6 +338,18 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c break; } + case Opcodes.POW_SCALAR: { + // Exponentiation: rd = rs1 ** rs2 + int rd = bytecode[pc++]; + int rs1 = bytecode[pc++]; + int rs2 = bytecode[pc++]; + registers[rd] = MathOperators.pow( + (RuntimeScalar) registers[rs1], + (RuntimeScalar) registers[rs2] + ); + break; + } + case Opcodes.NEG_SCALAR: { // Negation: rd = -rs int rd = bytecode[pc++]; @@ -478,6 +490,61 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c break; } + case Opcodes.DEFINED: { + // Defined check: rd = defined(rs) + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + RuntimeBase val = registers[rs]; + boolean isDefined = val != null && val.getDefinedBoolean(); + registers[rd] = isDefined ? + RuntimeScalarCache.scalarTrue : RuntimeScalarCache.scalarFalse; + break; + } + + case Opcodes.REF: { + // Ref check: rd = ref(rs) - returns blessed class name or type + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + RuntimeBase val = registers[rs]; + RuntimeScalar result; + if (val instanceof RuntimeScalar) { + result = org.perlonjava.operators.ReferenceOperators.ref((RuntimeScalar) val); + } else { + // For non-scalar types, convert to scalar first + result = org.perlonjava.operators.ReferenceOperators.ref(val.scalar()); + } + registers[rd] = result; + break; + } + + case Opcodes.BLESS: { + // Bless: rd = bless(rs_ref, rs_package) + int rd = bytecode[pc++]; + int refReg = bytecode[pc++]; + int packageReg = bytecode[pc++]; + RuntimeScalar ref = (RuntimeScalar) registers[refReg]; + RuntimeScalar packageName = (RuntimeScalar) registers[packageReg]; + registers[rd] = org.perlonjava.operators.ReferenceOperators.bless(ref, packageName); + break; + } + + case Opcodes.ISA: { + // ISA: rd = isa(rs_obj, rs_package) + int rd = bytecode[pc++]; + int objReg = bytecode[pc++]; + int packageReg = bytecode[pc++]; + RuntimeScalar obj = (RuntimeScalar) registers[objReg]; + RuntimeScalar packageName = (RuntimeScalar) registers[packageReg]; + // Create RuntimeArray with arguments + RuntimeArray isaArgs = new RuntimeArray(); + isaArgs.push(obj); + isaArgs.push(packageName); + // Call Universal.isa + RuntimeList result = org.perlonjava.perlmodule.Universal.isa(isaArgs, RuntimeContextType.SCALAR); + registers[rd] = result.scalar(); + break; + } + // ================================================================= // ARRAY OPERATIONS // ================================================================= @@ -488,17 +555,24 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c int arrayReg = bytecode[pc++]; int indexReg = bytecode[pc++]; - // Check type - if (!(registers[arrayReg] instanceof RuntimeArray)) { + RuntimeBase arrayBase = registers[arrayReg]; + RuntimeScalar idx = (RuntimeScalar) registers[indexReg]; + + if (arrayBase instanceof RuntimeArray) { + RuntimeArray arr = (RuntimeArray) arrayBase; + registers[rd] = arr.get(idx.getInt()); + } else if (arrayBase instanceof RuntimeList) { + RuntimeList list = (RuntimeList) arrayBase; + int index = idx.getInt(); + if (index < 0) index = list.elements.size() + index; + registers[rd] = (index >= 0 && index < list.elements.size()) + ? list.elements.get(index) + : new RuntimeScalar(); + } else { throw new RuntimeException("ARRAY_GET: register " + arrayReg + " contains " + - (registers[arrayReg] == null ? "null" : registers[arrayReg].getClass().getName()) + - " instead of RuntimeArray"); + (arrayBase == null ? "null" : arrayBase.getClass().getName()) + + " instead of RuntimeArray or RuntimeList"); } - - RuntimeArray arr = (RuntimeArray) registers[arrayReg]; - RuntimeScalar idx = (RuntimeScalar) registers[indexReg]; - // Uses RuntimeArray API directly - registers[rd] = arr.get(idx.getInt()); break; } @@ -555,10 +629,16 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c case Opcodes.ARRAY_SIZE: { // Array size: rd = scalar(@array) or scalar(value) // Use polymorphic scalar() method - arrays return size, scalars return themselves + // Special case for RuntimeList: return size, not last element int rd = bytecode[pc++]; int operandReg = bytecode[pc++]; RuntimeBase operand = registers[operandReg]; - registers[rd] = operand.scalar(); + if (operand instanceof RuntimeList) { + // For RuntimeList in list assignment context, return the count + registers[rd] = new RuntimeScalar(((RuntimeList) operand).size()); + } else { + registers[rd] = operand.scalar(); + } break; } @@ -983,6 +1063,46 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c // LIST OPERATIONS // ================================================================= + case Opcodes.LIST_TO_SCALAR: { + // Convert list to scalar context (returns size) + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + RuntimeBase val = registers[rs]; + if (val instanceof RuntimeList) { + registers[rd] = new RuntimeScalar(((RuntimeList) val).elements.size()); + } else if (val instanceof RuntimeArray) { + registers[rd] = new RuntimeScalar(((RuntimeArray) val).size()); + } else { + // Already a scalar + registers[rd] = val.scalar(); + } + break; + } + + case Opcodes.SCALAR_TO_LIST: { + // Convert scalar to RuntimeList + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + RuntimeBase val = registers[rs]; + if (val instanceof RuntimeList) { + // Already a list + registers[rd] = val; + } else if (val instanceof RuntimeArray) { + // Convert array to list + RuntimeList list = new RuntimeList(); + for (RuntimeScalar elem : (RuntimeArray) val) { + list.elements.add(elem); + } + registers[rd] = list; + } else { + // Scalar to list - wrap in a list + RuntimeList list = new RuntimeList(); + list.elements.add(val.scalar()); + registers[rd] = list; + } + break; + } + case Opcodes.CREATE_LIST: { // Create RuntimeList from registers // Format: [CREATE_LIST] [rd] [count] [rs1] [rs2] ... [rsN] @@ -1226,12 +1346,9 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c throw (PerlDieException) e; } - // Wrap other exceptions with interpreter context - throw new RuntimeException( - "Interpreter error in " + code.sourceName + ":" + code.sourceLine + - " at pc=" + pc + ": " + e.getMessage(), - e - ); + // Wrap other exceptions with interpreter context including bytecode context + String errorMessage = formatInterpreterError(code, pc, e); + throw new RuntimeException(errorMessage, e); } finally { // Always pop the interpreter state InterpreterState.pop(); @@ -1247,4 +1364,31 @@ private static int readInt(short[] bytecode, int pc) { int low = bytecode[pc + 1] & 0xFFFF; // Keep mask here - need full 32-bit range return (high << 16) | low; } + + /** + * Format an interpreter error with source location. + * Shows the error location using the pc-to-tokenIndex mapping if available. + */ + private static String formatInterpreterError(InterpretedCode code, int errorPc, Throwable e) { + StringBuilder sb = new StringBuilder(); + + // Try to get line number from pcToTokenIndex map + Integer tokenIndex = (code.pcToTokenIndex != null) ? code.pcToTokenIndex.get(errorPc) : null; + + if (tokenIndex != null) { + // We have token index information + sb.append("Interpreter error in ").append(code.sourceName) + .append(" at token ").append(tokenIndex) + .append(" (pc=").append(errorPc).append("): ") + .append(e.getMessage()); + } else { + // No token index available, use source line from code + sb.append("Interpreter error in ").append(code.sourceName) + .append(":").append(code.sourceLine) + .append(" at pc=").append(errorPc) + .append(": ").append(e.getMessage()); + } + + return sb.toString(); + } } diff --git a/src/main/java/org/perlonjava/interpreter/InterpretedCode.java b/src/main/java/org/perlonjava/interpreter/InterpretedCode.java index c1d7de51e..80daa6fcb 100644 --- a/src/main/java/org/perlonjava/interpreter/InterpretedCode.java +++ b/src/main/java/org/perlonjava/interpreter/InterpretedCode.java @@ -303,6 +303,12 @@ public String disassemble() { rs2 = bytecode[pc++]; sb.append("MOD_SCALAR r").append(rd).append(" = r").append(rs1).append(" % r").append(rs2).append("\n"); break; + case Opcodes.POW_SCALAR: + rd = bytecode[pc++]; + rs1 = bytecode[pc++]; + rs2 = bytecode[pc++]; + sb.append("POW_SCALAR r").append(rd).append(" = r").append(rs1).append(" ** r").append(rs2).append("\n"); + break; case Opcodes.NEG_SCALAR: rd = bytecode[pc++]; int rsNeg = bytecode[pc++]; @@ -582,6 +588,40 @@ public String disassemble() { rs = bytecode[pc++]; sb.append("NOT r").append(rd).append(" = !r").append(rs).append("\n"); break; + case Opcodes.DEFINED: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("DEFINED r").append(rd).append(" = defined(r").append(rs).append(")\n"); + break; + case Opcodes.REF: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("REF r").append(rd).append(" = ref(r").append(rs).append(")\n"); + break; + case Opcodes.BLESS: + rd = bytecode[pc++]; + int refReg = bytecode[pc++]; + int packageReg = bytecode[pc++]; + sb.append("BLESS r").append(rd).append(" = bless(r").append(refReg) + .append(", r").append(packageReg).append(")\n"); + break; + case Opcodes.ISA: + rd = bytecode[pc++]; + int objReg = bytecode[pc++]; + int pkgReg = bytecode[pc++]; + sb.append("ISA r").append(rd).append(" = isa(r").append(objReg) + .append(", r").append(pkgReg).append(")\n"); + break; + case Opcodes.LIST_TO_SCALAR: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("LIST_TO_SCALAR r").append(rd).append(" = last_element(r").append(rs).append(")\n"); + break; + case Opcodes.SCALAR_TO_LIST: + rd = bytecode[pc++]; + rs = bytecode[pc++]; + sb.append("SCALAR_TO_LIST r").append(rd).append(" = to_list(r").append(rs).append(")\n"); + break; case Opcodes.SLOW_OP: { int slowOpId = bytecode[pc++]; String opName = SlowOpcodeHandler.getSlowOpName(slowOpId); @@ -642,6 +682,15 @@ public String disassemble() { sb.append(" r").append(rd).append(" = r").append(sliceArrayReg) .append("[r").append(sliceIndicesReg).append("]"); break; + case Opcodes.SLOWOP_LIST_SLICE_FROM: + // Format: [rd] [listReg] [startIndex as 2 shorts] + rd = bytecode[pc++]; + int sliceFromListReg = bytecode[pc++]; + int startIndex = readInt(bytecode, pc); + pc += 2; // Skip the 2 shorts we just read + sb.append(" r").append(rd).append(" = r").append(sliceFromListReg) + .append("[").append(startIndex).append("..]"); + break; case Opcodes.SLOWOP_REVERSE: // Format: [rd] [argsReg] [ctx] rd = bytecode[pc++]; diff --git a/src/main/java/org/perlonjava/interpreter/Opcodes.java b/src/main/java/org/perlonjava/interpreter/Opcodes.java index 07436f4ac..3a3b2a58c 100644 --- a/src/main/java/org/perlonjava/interpreter/Opcodes.java +++ b/src/main/java/org/perlonjava/interpreter/Opcodes.java @@ -443,6 +443,18 @@ public class Opcodes { /** Sort operator: rd = ListOperators.sort(list_reg, closure_reg, package_name) */ public static final byte SORT = 101; + /** Defined operator: rd = defined(rs) - check if value is defined */ + public static final byte DEFINED = 102; + + /** Ref operator: rd = ref(rs) - get reference type as string */ + public static final byte REF = 103; + + /** Bless operator: rd = bless(rs_ref, rs_package) - bless a reference into a package */ + public static final byte BLESS = 104; + + /** ISA operator: rd = isa(rs_obj, rs_package) - check if object is instance of package */ + public static final byte ISA = 105; + // ================================================================= // Slow Operation IDs (0-255) // ================================================================= @@ -566,6 +578,12 @@ public class Opcodes { /** Slow op ID: hash.setSlice(keys_list, values_list) - hash slice assignment @hash{keys} = values */ public static final int SLOWOP_HASH_SLICE_SET = 38; + /** Slow op ID: rd = list[start..] - extract list slice from start index to end */ + public static final int SLOWOP_LIST_SLICE_FROM = 39; + + /** Slow op ID: rd = length(string) - get string length */ + public static final int SLOWOP_LENGTH = 40; + // ================================================================= // OPCODES 93-255: RESERVED FOR FUTURE FAST OPERATIONS // ================================================================= diff --git a/src/main/java/org/perlonjava/interpreter/SlowOpcodeHandler.java b/src/main/java/org/perlonjava/interpreter/SlowOpcodeHandler.java index a4d81b63a..95fce5f35 100644 --- a/src/main/java/org/perlonjava/interpreter/SlowOpcodeHandler.java +++ b/src/main/java/org/perlonjava/interpreter/SlowOpcodeHandler.java @@ -196,6 +196,12 @@ public static int execute( case Opcodes.SLOWOP_HASH_SLICE_SET: return executeHashSliceSet(bytecode, pc, registers); + case Opcodes.SLOWOP_LIST_SLICE_FROM: + return executeListSliceFrom(bytecode, pc, registers); + + case Opcodes.SLOWOP_LENGTH: + return executeLength(bytecode, pc, registers); + default: throw new RuntimeException( "Unknown slow operation ID: " + slowOpId + @@ -248,6 +254,9 @@ public static String getSlowOpName(int slowOpId) { case Opcodes.SLOWOP_DEREF_HASH -> "deref_hash"; case Opcodes.SLOWOP_HASH_SLICE -> "hash_slice"; case Opcodes.SLOWOP_HASH_SLICE_DELETE -> "hash_slice_delete"; + case Opcodes.SLOWOP_HASH_SLICE_SET -> "hash_slice_set"; + case Opcodes.SLOWOP_LIST_SLICE_FROM -> "list_slice_from"; + case Opcodes.SLOWOP_LENGTH -> "length"; default -> "slowop_" + slowOpId; }; } @@ -1076,6 +1085,89 @@ private static int executeHashSliceSet( return pc; } + /** + * SLOWOP_LIST_SLICE_FROM: rd = list[start..] + * Extract a slice from a list starting at given index to the end + * Format: [SLOWOP_LIST_SLICE_FROM] [rd] [listReg] [startIndex] + */ + private static int executeListSliceFrom( + short[] bytecode, + int pc, + RuntimeBase[] registers) { + + int rd = bytecode[pc++]; + int listReg = bytecode[pc++]; + // Read startIndex as 2 shorts (int = high 16 bits + low 16 bits) + int high = bytecode[pc++] & 0xFFFF; + int low = bytecode[pc++] & 0xFFFF; + int startIndex = (high << 16) | low; + + RuntimeBase listBase = registers[listReg]; + RuntimeList sourceList; + + // Convert to RuntimeList if needed + if (listBase instanceof RuntimeList) { + sourceList = (RuntimeList) listBase; + } else if (listBase instanceof RuntimeArray) { + // Convert RuntimeArray to RuntimeList + sourceList = new RuntimeList(); + for (RuntimeScalar elem : (RuntimeArray) listBase) { + sourceList.elements.add(elem); + } + } else { + // Single value - wrap in list + sourceList = new RuntimeList(); + sourceList.elements.add(listBase.scalar()); + } + + // Extract slice from startIndex to end + RuntimeList result = new RuntimeList(); + int size = sourceList.elements.size(); + + // Handle negative indices + if (startIndex < 0) { + startIndex = size + startIndex; + } + + // Clamp to valid range + if (startIndex < 0) { + startIndex = 0; + } + if (startIndex > size) { + startIndex = size; + } + + // Copy elements from startIndex to end + for (int i = startIndex; i < size; i++) { + result.elements.add(sourceList.elements.get(i)); + } + + registers[rd] = result; + return pc; + } + + /** + * SLOWOP_LENGTH: rd = length(string) + * Get the length of a string + * Format: [SLOWOP_LENGTH] [rd] [stringReg] + */ + private static int executeLength( + short[] bytecode, + int pc, + RuntimeBase[] registers) { + + int rd = bytecode[pc++]; + int stringReg = bytecode[pc++]; + + RuntimeBase stringBase = registers[stringReg]; + RuntimeScalar stringScalar = stringBase.scalar(); + + int length = stringScalar.toString().length(); + registers[rd] = new RuntimeScalar(length); + + return pc; + } + private SlowOpcodeHandler() { // Utility class - no instantiation } From 2a4d61f7c15776ed9b1bbc8b14af03047e2e5445 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 13 Feb 2026 23:35:06 +0100 Subject: [PATCH 2/4] Fix interpreter error messages to show accurate line numbers using TreeMap Track line numbers like Perl codegen does: - Add line number tracking at each statement in BlockNode visitor - Use TreeMap for pcToTokenIndex (instead of HashMap) to enable floorEntry lookup - Use floorEntry to find nearest token index when exact PC match not found - Pass ErrorMessageUtil through InterpretedCode for line number conversion Error messages now show accurate source line numbers: Before: "Interpreter error in demo.t:1 at pc=724" and "demo.t:1446" After: "Interpreter error in demo.t line 112 (pc=724)" All line numbers are now within the actual file size and point to the correct source lines. Co-Authored-By: Claude Opus 4.6 --- .../interpreter/BytecodeCompiler.java | 12 ++++++-- .../interpreter/BytecodeInterpreter.java | 28 ++++++++++++----- .../interpreter/InterpretedCode.java | 30 +++++++++++++++---- 3 files changed, 56 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java b/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java index 40db99d35..150d48606 100644 --- a/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java @@ -36,7 +36,7 @@ public class BytecodeCompiler implements Visitor { private String currentPackage = "main"; // Token index tracking for error reporting - private final Map pcToTokenIndex = new HashMap<>(); + private final TreeMap pcToTokenIndex = new TreeMap<>(); private int currentTokenIndex = -1; // Track current token for error reporting // Error reporting @@ -279,7 +279,8 @@ public InterpretedCode compile(Node node, EmitterContext ctx) { sourceName, sourceLine, pcToTokenIndex, // Pass token index map for error reporting - variableRegistry // Variable registry for eval STRING + variableRegistry, // Variable registry for eval STRING + errorUtil // Pass error util for line number lookup ); } @@ -400,6 +401,13 @@ private RuntimeBase getVariableValueFromContext(String varName, EmitterContext c public void visit(BlockNode node) { // Visit each statement in the block for (Node stmt : node.elements) { + // Track line number for this statement (like codegen's setDebugInfoLineNumber) + if (stmt != null) { + int tokenIndex = stmt.getIndex(); + int pc = bytecode.size(); + pcToTokenIndex.put(pc, tokenIndex); + } + // Standalone statements (not assignments) use VOID context int savedContext = currentCallContext; diff --git a/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java b/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java index b9fc76f19..3b82fed37 100644 --- a/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java @@ -1372,11 +1372,25 @@ private static int readInt(short[] bytecode, int pc) { private static String formatInterpreterError(InterpretedCode code, int errorPc, Throwable e) { StringBuilder sb = new StringBuilder(); - // Try to get line number from pcToTokenIndex map - Integer tokenIndex = (code.pcToTokenIndex != null) ? code.pcToTokenIndex.get(errorPc) : null; + // Try to get token index from pcToTokenIndex map + // Use floorEntry to find the nearest token index before or at errorPc + Integer tokenIndex = null; + if (code.pcToTokenIndex != null && !code.pcToTokenIndex.isEmpty()) { + var entry = code.pcToTokenIndex.floorEntry(errorPc); + if (entry != null) { + tokenIndex = entry.getValue(); + } + } - if (tokenIndex != null) { - // We have token index information + if (tokenIndex != null && code.errorUtil != null) { + // We have token index and errorUtil - convert to line number + int lineNumber = code.errorUtil.getLineNumber(tokenIndex); + sb.append("Interpreter error in ").append(code.sourceName) + .append(" line ").append(lineNumber) + .append(" (pc=").append(errorPc).append("): ") + .append(e.getMessage()); + } else if (tokenIndex != null) { + // We have token index but no errorUtil sb.append("Interpreter error in ").append(code.sourceName) .append(" at token ").append(tokenIndex) .append(" (pc=").append(errorPc).append("): ") @@ -1384,9 +1398,9 @@ private static String formatInterpreterError(InterpretedCode code, int errorPc, } else { // No token index available, use source line from code sb.append("Interpreter error in ").append(code.sourceName) - .append(":").append(code.sourceLine) - .append(" at pc=").append(errorPc) - .append(": ").append(e.getMessage()); + .append(" line ").append(code.sourceLine) + .append(" (pc=").append(errorPc).append("): ") + .append(e.getMessage()); } return sb.toString(); diff --git a/src/main/java/org/perlonjava/interpreter/InterpretedCode.java b/src/main/java/org/perlonjava/interpreter/InterpretedCode.java index 80daa6fcb..dd941c89f 100644 --- a/src/main/java/org/perlonjava/interpreter/InterpretedCode.java +++ b/src/main/java/org/perlonjava/interpreter/InterpretedCode.java @@ -2,6 +2,7 @@ import org.perlonjava.runtime.*; import java.util.Map; +import java.util.TreeMap; /** * Interpreted bytecode that extends RuntimeCode. @@ -29,7 +30,8 @@ public class InterpretedCode extends RuntimeCode { // Debug information (optional) public final String sourceName; // Source file name (for stack traces) public final int sourceLine; // Source line number - public final java.util.Map pcToTokenIndex; // Map bytecode PC to tokenIndex for error reporting + public final TreeMap pcToTokenIndex; // Map bytecode PC to tokenIndex for error reporting (TreeMap for floorEntry lookup) + public final ErrorMessageUtil errorUtil; // For converting token index to line numbers /** * Constructor for InterpretedCode. @@ -43,12 +45,14 @@ public class InterpretedCode extends RuntimeCode { * @param sourceLine Source line number for debugging * @param pcToTokenIndex Map from bytecode PC to AST tokenIndex for error reporting * @param variableRegistry Variable name → register index mapping (for eval STRING) + * @param errorUtil Error message utility for line number lookup */ public InterpretedCode(short[] bytecode, Object[] constants, String[] stringPool, int maxRegisters, RuntimeBase[] capturedVars, String sourceName, int sourceLine, - java.util.Map pcToTokenIndex, - Map variableRegistry) { + TreeMap pcToTokenIndex, + Map variableRegistry, + ErrorMessageUtil errorUtil) { super(null, new java.util.ArrayList<>()); // Call RuntimeCode constructor with null prototype, empty attributes this.bytecode = bytecode; this.constants = constants; @@ -59,6 +63,7 @@ public InterpretedCode(short[] bytecode, Object[] constants, String[] stringPool this.sourceLine = sourceLine; this.pcToTokenIndex = pcToTokenIndex; this.variableRegistry = variableRegistry; + this.errorUtil = errorUtil; } // Legacy constructor for backward compatibility @@ -67,7 +72,21 @@ public InterpretedCode(short[] bytecode, Object[] constants, String[] stringPool String sourceName, int sourceLine, java.util.Map pcToTokenIndex) { this(bytecode, constants, stringPool, maxRegisters, capturedVars, - sourceName, sourceLine, pcToTokenIndex, null); + sourceName, sourceLine, + pcToTokenIndex instanceof TreeMap ? (TreeMap)pcToTokenIndex : new TreeMap<>(pcToTokenIndex), + null, null); + } + + // Legacy constructor with variableRegistry but no errorUtil + public InterpretedCode(short[] bytecode, Object[] constants, String[] stringPool, + int maxRegisters, RuntimeBase[] capturedVars, + String sourceName, int sourceLine, + java.util.Map pcToTokenIndex, + Map variableRegistry) { + this(bytecode, constants, stringPool, maxRegisters, capturedVars, + sourceName, sourceLine, + pcToTokenIndex instanceof TreeMap ? (TreeMap)pcToTokenIndex : new TreeMap<>(pcToTokenIndex), + variableRegistry, null); } /** @@ -126,7 +145,8 @@ public InterpretedCode withCapturedVars(RuntimeBase[] capturedVars) { this.sourceName, this.sourceLine, this.pcToTokenIndex, // Preserve token index map - this.variableRegistry // Preserve variable registry + this.variableRegistry, // Preserve variable registry + this.errorUtil // Preserve error util ); } From a89fc267c1234fe079a20c2fcf549070ebea5c69 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 13 Feb 2026 23:42:04 +0100 Subject: [PATCH 3/4] Add robustness to interpreter opcodes and improve error diagnostics - Make JOIN opcode handle non-scalar separators by converting to scalar - Make PRINT and SAY opcodes handle RuntimeArray in addition to RuntimeList/RuntimeScalar - Add ClassCastException handler to show bytecode context around errors - Display bytecode hex dump (10 bytes before, 5 after) to help debug opcode issues Error messages now show: - Bytecode context: [ ... >>> AE <<< ... ] - Helps identify undefined opcodes or PC misalignment issues Progress: 3 subtests passing (26/26 tests), working on opcode 0xAE issue Co-Authored-By: Claude Opus 4.6 --- .../interpreter/BytecodeInterpreter.java | 61 ++++++++++++++++++- 1 file changed, 58 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java b/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java index 3b82fed37..9055586df 100644 --- a/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java @@ -834,11 +834,22 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c int filehandleReg = bytecode[pc++]; Object val = registers[contentReg]; - RuntimeScalar fh = (RuntimeScalar) registers[filehandleReg]; + + // Filehandle should be scalar - convert if needed + RuntimeBase fhBase = registers[filehandleReg]; + RuntimeScalar fh = (fhBase instanceof RuntimeScalar) + ? (RuntimeScalar) fhBase + : fhBase.scalar(); RuntimeList list; if (val instanceof RuntimeList) { list = (RuntimeList) val; + } else if (val instanceof RuntimeArray) { + // Convert RuntimeArray to RuntimeList + list = new RuntimeList(); + for (RuntimeScalar elem : (RuntimeArray) val) { + list.add(elem); + } } else if (val instanceof RuntimeScalar) { // Convert scalar to single-element list list = new RuntimeList(); @@ -859,11 +870,22 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c int filehandleReg = bytecode[pc++]; Object val = registers[contentReg]; - RuntimeScalar fh = (RuntimeScalar) registers[filehandleReg]; + + // Filehandle should be scalar - convert if needed + RuntimeBase fhBase = registers[filehandleReg]; + RuntimeScalar fh = (fhBase instanceof RuntimeScalar) + ? (RuntimeScalar) fhBase + : fhBase.scalar(); RuntimeList list; if (val instanceof RuntimeList) { list = (RuntimeList) val; + } else if (val instanceof RuntimeArray) { + // Convert RuntimeArray to RuntimeList + list = new RuntimeList(); + for (RuntimeScalar elem : (RuntimeArray) val) { + list.add(elem); + } } else if (val instanceof RuntimeScalar) { // Convert scalar to single-element list list = new RuntimeList(); @@ -1145,7 +1167,12 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c int separatorReg = bytecode[pc++]; int listReg = bytecode[pc++]; - RuntimeScalar separator = (RuntimeScalar) registers[separatorReg]; + // Separator should be scalar - convert if needed + RuntimeBase separatorBase = registers[separatorReg]; + RuntimeScalar separator = (separatorBase instanceof RuntimeScalar) + ? (RuntimeScalar) separatorBase + : separatorBase.scalar(); + RuntimeBase list = registers[listReg]; // Call StringOperators.joinForInterpolation (doesn't warn on undef) @@ -1326,6 +1353,34 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c // Fell through end of bytecode - return empty list return new RuntimeList(); + } catch (ClassCastException e) { + // Special handling for ClassCastException to show which opcode is failing + // Check if we're inside an eval block first + if (!evalCatchStack.isEmpty()) { + evalCatchStack.pop(); + WarnDie.catchEval(e); + return new RuntimeList(); + } + + // Not in eval - show detailed error with bytecode context + int errorPc = Math.max(0, pc - 1); // Go back one instruction + + // Show bytecode context (10 bytes before errorPc) + StringBuilder bcContext = new StringBuilder(); + bcContext.append("\nBytecode context: ["); + for (int i = Math.max(0, errorPc - 10); i < Math.min(bytecode.length, errorPc + 5); i++) { + if (i == errorPc) { + bcContext.append(" >>>"); + } + bcContext.append(String.format(" %02X", bytecode[i] & 0xFF)); + if (i == errorPc) { + bcContext.append("<<<"); + } + } + bcContext.append(" ]"); + + String errorMessage = "ClassCastException" + bcContext + ": " + e.getMessage(); + throw new RuntimeException(formatInterpreterError(code, errorPc, new Exception(errorMessage)), e); } catch (Throwable e) { // Check if we're inside an eval block if (!evalCatchStack.isEmpty()) { From 20d05f737afcc0bccdc45549c6e684395005f85f Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 13 Feb 2026 23:48:44 +0100 Subject: [PATCH 4/4] Fix comparison operators and add COMPARE_STR to handle non-scalar operands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add auto-conversion to scalar for all comparison operators (==, !=, <, >, <=>, cmp) - Implement missing COMPARE_STR (cmp) opcode in BytecodeInterpreter - Fix STORE_GLOBAL_SCALAR to convert non-scalar values before storing - All comparison operators now handle RuntimeArray/RuntimeList by converting to scalar This fixes the ClassCastException when comparing non-scalar values: - `keys %hash == 2` now works (keys returns array, converted to count for comparison) - String interpolation with arrays now works correctly Test Results: ✅ Subtest 1: Variable assignment (2/2) ✅ Subtest 2: List assignment in scalar context (13/13) ✅ Subtest 3: List assignment with lvalue array/hash (16/16) ✅ Subtest 4: Basic syntax tests ✅ Subtest 6: Map tests (2/2) ✅ Subtest 7: Grep tests (2/2) ⚠️ Subtest 8: Sort tests (4/5 - one sort without block issue) ✅ Subtest 9: Object tests (2/2) Total: ~50+ tests passing (major progress from 26!) Co-Authored-By: Claude Opus 4.6 --- .../interpreter/BytecodeInterpreter.java | 85 ++++++++++++++----- 1 file changed, 64 insertions(+), 21 deletions(-) diff --git a/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java b/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java index 9055586df..ce3904ddd 100644 --- a/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java @@ -187,7 +187,14 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c int nameIdx = bytecode[pc++]; int srcReg = bytecode[pc++]; String name = code.stringPool[nameIdx]; - GlobalVariable.getGlobalVariable(name).set((RuntimeScalar) registers[srcReg]); + + // Convert to scalar if needed + RuntimeBase value = registers[srcReg]; + RuntimeScalar scalarValue = (value instanceof RuntimeScalar) + ? (RuntimeScalar) value + : value.scalar(); + + GlobalVariable.getGlobalVariable(name).set(scalarValue); break; } @@ -421,10 +428,30 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c int rd = bytecode[pc++]; int rs1 = bytecode[pc++]; int rs2 = bytecode[pc++]; - registers[rd] = CompareOperators.spaceship( - (RuntimeScalar) registers[rs1], - (RuntimeScalar) registers[rs2] - ); + + // Convert operands to scalar if needed + RuntimeBase val1 = registers[rs1]; + RuntimeBase val2 = registers[rs2]; + RuntimeScalar s1 = (val1 instanceof RuntimeScalar) ? (RuntimeScalar) val1 : val1.scalar(); + RuntimeScalar s2 = (val2 instanceof RuntimeScalar) ? (RuntimeScalar) val2 : val2.scalar(); + + registers[rd] = CompareOperators.spaceship(s1, s2); + break; + } + + case Opcodes.COMPARE_STR: { + // String comparison: rd = rs1 cmp rs2 + int rd = bytecode[pc++]; + int rs1 = bytecode[pc++]; + int rs2 = bytecode[pc++]; + + // Convert operands to scalar if needed + RuntimeBase val1 = registers[rs1]; + RuntimeBase val2 = registers[rs2]; + RuntimeScalar s1 = (val1 instanceof RuntimeScalar) ? (RuntimeScalar) val1 : val1.scalar(); + RuntimeScalar s2 = (val2 instanceof RuntimeScalar) ? (RuntimeScalar) val2 : val2.scalar(); + + registers[rd] = CompareOperators.cmp(s1, s2); break; } @@ -433,10 +460,14 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c int rd = bytecode[pc++]; int rs1 = bytecode[pc++]; int rs2 = bytecode[pc++]; - registers[rd] = CompareOperators.equalTo( - (RuntimeScalar) registers[rs1], - (RuntimeScalar) registers[rs2] - ); + + // Convert operands to scalar if needed + RuntimeBase val1 = registers[rs1]; + RuntimeBase val2 = registers[rs2]; + RuntimeScalar s1 = (val1 instanceof RuntimeScalar) ? (RuntimeScalar) val1 : val1.scalar(); + RuntimeScalar s2 = (val2 instanceof RuntimeScalar) ? (RuntimeScalar) val2 : val2.scalar(); + + registers[rd] = CompareOperators.equalTo(s1, s2); break; } @@ -445,10 +476,14 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c int rd = bytecode[pc++]; int rs1 = bytecode[pc++]; int rs2 = bytecode[pc++]; - registers[rd] = CompareOperators.lessThan( - (RuntimeScalar) registers[rs1], - (RuntimeScalar) registers[rs2] - ); + + // Convert operands to scalar if needed + RuntimeBase val1 = registers[rs1]; + RuntimeBase val2 = registers[rs2]; + RuntimeScalar s1 = (val1 instanceof RuntimeScalar) ? (RuntimeScalar) val1 : val1.scalar(); + RuntimeScalar s2 = (val2 instanceof RuntimeScalar) ? (RuntimeScalar) val2 : val2.scalar(); + + registers[rd] = CompareOperators.lessThan(s1, s2); break; } @@ -457,10 +492,14 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c int rd = bytecode[pc++]; int rs1 = bytecode[pc++]; int rs2 = bytecode[pc++]; - registers[rd] = CompareOperators.greaterThan( - (RuntimeScalar) registers[rs1], - (RuntimeScalar) registers[rs2] - ); + + // Convert operands to scalar if needed + RuntimeBase val1 = registers[rs1]; + RuntimeBase val2 = registers[rs2]; + RuntimeScalar s1 = (val1 instanceof RuntimeScalar) ? (RuntimeScalar) val1 : val1.scalar(); + RuntimeScalar s2 = (val2 instanceof RuntimeScalar) ? (RuntimeScalar) val2 : val2.scalar(); + + registers[rd] = CompareOperators.greaterThan(s1, s2); break; } @@ -469,10 +508,14 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c int rd = bytecode[pc++]; int rs1 = bytecode[pc++]; int rs2 = bytecode[pc++]; - registers[rd] = CompareOperators.notEqualTo( - (RuntimeScalar) registers[rs1], - (RuntimeScalar) registers[rs2] - ); + + // Convert operands to scalar if needed + RuntimeBase val1 = registers[rs1]; + RuntimeBase val2 = registers[rs2]; + RuntimeScalar s1 = (val1 instanceof RuntimeScalar) ? (RuntimeScalar) val1 : val1.scalar(); + RuntimeScalar s2 = (val2 instanceof RuntimeScalar) ? (RuntimeScalar) val2 : val2.scalar(); + + registers[rd] = CompareOperators.notEqualTo(s1, s2); break; }