Optimize codegen: inline string methods, C-style for loops, direct array access

- Remove redundant echo in @awk functions (leaked to stdout) - Replace range() subshell fork with C-style for ((i=0; i<N; i++)) - Inline string methods (upper/lower/len/trim/replace/contains/starts/ends/substr/index) for simple variable receivers - Direct array subscript for .get() and .set() instead of function calls

Optimize codegen: inline string methods, C-style for loops, direct array access
3d28f1b4 · Roman Alifanov · e9f4108d · 3d28f1b4 · 3d28f1b4 · 3d28f1b4
Commit 3d28f1b4 authored Feb 25, 2026 by Roman Alifanov
Hide whitespace changes
Inline Side-by-side

Showing with 136 additions and 10 deletions

awk.py compiler/backend/bash/awk.py +0 -1

expr.py compiler/backend/bash/expr.py +92 -0

stmt.py compiler/backend/bash/stmt.py +44 -9

No files found.
--- a/compiler/backend/bash/awk.py
+++ b/compiler/backend/bash/awk.py
@@ -75,7 +75,6 @@ def _emit_awk_body(fn: IRFunction, validate_dec, ctx: EmitContext) -> None:
    ctx.emit("}')")
    ctx.emit('local __awk_rc=$?')
-    ctx.emit(f'echo "${{{RET_VAR}}}"')
    ctx.emit('return $__awk_rc')

--- a/compiler/backend/bash/expr.py
+++ b/compiler/backend/bash/expr.py
@@ -723,6 +723,80 @@ _NS_METHOD_PREFIX: dict[str, str] = {
 }
+_INLINE_STRING_METHODS = frozenset({
+    'upper', 'lower', 'len', 'trim', 'replace', 'contains', 'starts', 'ends',
+    'substr', 'index',
+})
+def _try_inline_string_method(recv_name: str, method: str, args: list, ctx: 'EmitContext') -> str | None:
+    if method not in _INLINE_STRING_METHODS:
+        return None
+    if method == 'upper':
+        ctx.emit(f'{RET_VAR}="${{{recv_name}^^}}"')
+        return f'"${{{RET_VAR}}}"'
+    if method == 'lower':
+        ctx.emit(f'{RET_VAR}="${{{recv_name},,}}"')
+        return f'"${{{RET_VAR}}}"'
+    if method == 'len':
+        ctx.emit(f'{RET_VAR}=${{#{recv_name}}}')
+        return f'"${{{RET_VAR}}}"'
+    if method == 'trim':
+        tmp = ctx.fresh_tmp()
+        ctx.emit(f'{tmp}="${{{recv_name}}}"')
+        ctx.emit(f'{tmp}="${{{tmp}#"${{{tmp}%%[![:space:]]*}}"}}"')
+        ctx.emit(f'{RET_VAR}="${{{tmp}%"${{{tmp}##*[![:space:]]}}"}}"')
+        return f'"${{{RET_VAR}}}"'
+    if method == 'replace':
+        old_arg = _expr(args[0], ctx)
+        new_arg = _expr(args[1], ctx)
+        tmp_p = ctx.fresh_tmp()
+        tmp_r = ctx.fresh_tmp()
+        ctx.emit(f'{tmp_p}={old_arg}')
+        ctx.emit(f'{tmp_r}={new_arg}')
+        ctx.emit(f'{RET_VAR}="${{{recv_name}//"${{{tmp_p}}}"/"{{{tmp_r}}}"}}"')
+        return f'"${{{RET_VAR}}}"'
+    if method == 'contains':
+        arg = _expr(args[0], ctx)
+        ctx.emit(f'[[ "${{{recv_name}}}" == *{arg}* ]] && {RET_VAR}=true || {RET_VAR}=false')
+        return f'"${{{RET_VAR}}}"'
+    if method == 'starts':
+        arg = _expr(args[0], ctx)
+        ctx.emit(f'[[ "${{{recv_name}}}" == {arg}* ]] && {RET_VAR}=true || {RET_VAR}=false')
+        return f'"${{{RET_VAR}}}"'
+    if method == 'ends':
+        arg = _expr(args[0], ctx)
+        ctx.emit(f'[[ "${{{recv_name}}}" == *{arg} ]] && {RET_VAR}=true || {RET_VAR}=false')
+        return f'"${{{RET_VAR}}}"'
+    if method == 'substr':
+        start_arg = _expr(args[0], ctx)
+        len_arg = _expr(args[1], ctx)
+        start_bare = _to_arith(start_arg)
+        len_bare = _to_arith(len_arg)
+        ctx.emit(f'{RET_VAR}="${{{recv_name}:{start_bare}:{len_bare}}}"')
+        return f'"${{{RET_VAR}}}"'
+    if method == 'index':
+        arg = _expr(args[0], ctx)
+        tmp_p = ctx.fresh_tmp()
+        tmp = ctx.fresh_tmp()
+        ctx.emit(f'{tmp_p}={arg}')
+        ctx.emit(f'{tmp}="${{{recv_name}%%"${{{tmp_p}}}"*}}"')
+        ctx.emit(f'[[ "${{{tmp}}}" == "${{{recv_name}}}" ]] && {RET_VAR}=-1 || {RET_VAR}=${{#{tmp}}}')
+        return f'"${{{RET_VAR}}}"'
+    return None
 def _stdlib_method_expr(node: IRMethodCall, ctx: 'EmitContext') -> str:
    method = node.method_name
    recv   = _expr(node.receiver, ctx)
@@ -768,6 +842,12 @@ def _stdlib_method_expr(node: IRMethodCall, ctx: 'EmitContext') -> str:
    if method == 'len' and (is_known_array or is_actual_array) and not is_field_recv:
        return '"${#' + recv_name + '[@]}"'
+    # arr.get(idx) fast path — direct subscript access
+    if method == 'get' and (is_known_array or is_actual_array) and not is_field_recv and not is_param_array:
+        idx_expr = _expr(node.args[0], ctx)
+        idx_bare = idx_expr.strip('"').strip('${}')
+        return f'"${{{recv_name}[{idx_bare}]}}"'
    args = _ct_args(node.args, ctx)
    # Array path — field access uses recv directly (it expands to the array name)
@@ -799,6 +879,12 @@ def _stdlib_method_expr(node: IRMethodCall, ctx: 'EmitContext') -> str:
        ctx.emit(f'{fn} {call_args}')
        return f'"${{{RET_VAR}}}"'
+    # String inlining fast path: only for simple variable receivers
+    if isinstance(node.receiver, IRIdentifier) and not is_known_array and not is_known_dict:
+        inlined = _try_inline_string_method(recv_name, method, node.args, ctx)
+        if inlined is not None:
+            return inlined
    # String methods (and fallback)
    prefix = _STDLIB_METHOD_PREFIX.get(method, STR_PREFIX)
    fn = f'{prefix}{method}'
@@ -852,6 +938,12 @@ def _stdlib_method_stmt(node: IRMethodCall, ctx: 'EmitContext') -> None:
        if method == 'shift':
            ctx.emit(f'{recv_name}=("${{{recv_name}[@]:1}}")')
            return
+        if method == 'set':
+            idx_expr = _expr(node.args[0], ctx)
+            val_expr = _expr(node.args[1], ctx)
+            idx_bare = idx_expr.strip('"').strip('${}')
+            ctx.emit(f'{recv_name}[{idx_bare}]={val_expr}')
+            return
    if (is_known_dict or is_actual_dict) and not is_field_recv:
        if method == 'set':

--- a/compiler/backend/bash/stmt.py
+++ b/compiler/backend/bash/stmt.py
@@ -479,14 +479,46 @@ def _while(node: IRWhile, ctx: 'EmitContext') -> None:
    ctx.emit('done')
+def _emit_c_style_for(var: str, args: list, ctx: 'EmitContext') -> None:
+    from .expr import _to_arith
+    from .constants import RET_VAR
+    def _safe_bound(val: str) -> str:
+        if RET_VAR in val:
+            tmp = ctx.fresh_tmp()
+            ctx.emit(f'{tmp}={val}')
+            return f'${{{tmp}}}'
+        return val
+    if len(args) == 1:
+        end = _safe_bound(_to_arith(_expr(args[0], ctx)))
+        ctx.emit(f'for (({var}=0; {var}<{end}; {var}++)); do')
+    elif len(args) == 2:
+        start = _safe_bound(_to_arith(_expr(args[0], ctx)))
+        end = _safe_bound(_to_arith(_expr(args[1], ctx)))
+        ctx.emit(f'for (({var}={start}; {var}<{end}; {var}++)); do')
+    elif len(args) == 3:
+        start = _safe_bound(_to_arith(_expr(args[0], ctx)))
+        end = _safe_bound(_to_arith(_expr(args[1], ctx)))
+        step = _safe_bound(_to_arith(_expr(args[2], ctx)))
+        ctx.emit(f'for (({var}={start}; {var}<{end}; {var}+={step})); do')
+    else:
+        args_str = _ct_args(args, ctx)
+        ctx.emit(f'for {var} in $(__ct_range {args_str}); do')
 def _for(node: IRFor, ctx: 'EmitContext') -> None:
    """for i in range(...)"""
    from ...ir.nodes import IRCall
    from .expr import _BUILTIN_BASH
    if isinstance(node.iterable, IRCall):
-        bn = _BUILTIN_BASH.get(node.iterable.callee_name, node.iterable.callee_name)
+        cname = node.iterable.callee_name
-        args = _ct_args(node.iterable.args, ctx)
+        if cname in ('range', '__ct_range'):
-        ctx.emit(f'for {node.variable} in $({bn} {args}); do')
+            _emit_c_style_for(node.variable, node.iterable.args, ctx)
+        else:
+            bn = _BUILTIN_BASH.get(cname, cname)
+            args = _ct_args(node.iterable.args, ctx)
+            ctx.emit(f'for {node.variable} in $({bn} {args}); do')
    else:
        iterable = _expr(node.iterable, ctx) if node.iterable else '"$@"'
        ctx.emit(f'for {node.variable} in {iterable}; do')
@@ -535,15 +567,18 @@ def _foreach(node: IRForeach, ctx: 'EmitContext') -> None:
        ctx.emit('done')
        return
-    # range() call → iterate over stdout sequence
+    # range() call → C-style for loop (no subshell)
    from ...ir.nodes import IRCall as _IRCall
    if isinstance(iterable, _IRCall):
-        bash_name = iterable.callee_name
+        cname = iterable.callee_name
-        from .expr import _BUILTIN_BASH
-        bash_name = _BUILTIN_BASH.get(bash_name, bash_name)
-        args = _ct_args(iterable.args, ctx)
        var = vars_[0] if vars_ else '__ct_item'
-        ctx.emit(f'for {var} in $({bash_name} {args}); do')
+        if cname in ('range', '__ct_range'):
+            _emit_c_style_for(var, iterable.args, ctx)
+        else:
+            from .expr import _BUILTIN_BASH
+            bash_name = _BUILTIN_BASH.get(cname, cname)
+            args = _ct_args(iterable.args, ctx)
+            ctx.emit(f'for {var} in $({bash_name} {args}); do')
        with ctx.indented():
            if node.body:
                emit_block(node.body, ctx)