Commit 3d28f1b4 authored by Roman Alifanov's avatar Roman Alifanov

Optimize codegen: inline string methods, C-style for loops, direct array access

- Remove redundant echo in @awk functions (leaked to stdout) - Replace range() subshell fork with C-style for ((i=0; i<N; i++)) - Inline string methods (upper/lower/len/trim/replace/contains/starts/ends/substr/index) for simple variable receivers - Direct array subscript for .get() and .set() instead of function calls
parent e9f4108d
......@@ -75,7 +75,6 @@ def _emit_awk_body(fn: IRFunction, validate_dec, ctx: EmitContext) -> None:
ctx.emit("}')")
ctx.emit('local __awk_rc=$?')
ctx.emit(f'echo "${{{RET_VAR}}}"')
ctx.emit('return $__awk_rc')
......
......@@ -723,6 +723,80 @@ _NS_METHOD_PREFIX: dict[str, str] = {
}
_INLINE_STRING_METHODS = frozenset({
'upper', 'lower', 'len', 'trim', 'replace', 'contains', 'starts', 'ends',
'substr', 'index',
})
def _try_inline_string_method(recv_name: str, method: str, args: list, ctx: 'EmitContext') -> str | None:
if method not in _INLINE_STRING_METHODS:
return None
if method == 'upper':
ctx.emit(f'{RET_VAR}="${{{recv_name}^^}}"')
return f'"${{{RET_VAR}}}"'
if method == 'lower':
ctx.emit(f'{RET_VAR}="${{{recv_name},,}}"')
return f'"${{{RET_VAR}}}"'
if method == 'len':
ctx.emit(f'{RET_VAR}=${{#{recv_name}}}')
return f'"${{{RET_VAR}}}"'
if method == 'trim':
tmp = ctx.fresh_tmp()
ctx.emit(f'{tmp}="${{{recv_name}}}"')
ctx.emit(f'{tmp}="${{{tmp}#"${{{tmp}%%[![:space:]]*}}"}}"')
ctx.emit(f'{RET_VAR}="${{{tmp}%"${{{tmp}##*[![:space:]]}}"}}"')
return f'"${{{RET_VAR}}}"'
if method == 'replace':
old_arg = _expr(args[0], ctx)
new_arg = _expr(args[1], ctx)
tmp_p = ctx.fresh_tmp()
tmp_r = ctx.fresh_tmp()
ctx.emit(f'{tmp_p}={old_arg}')
ctx.emit(f'{tmp_r}={new_arg}')
ctx.emit(f'{RET_VAR}="${{{recv_name}//"${{{tmp_p}}}"/"{{{tmp_r}}}"}}"')
return f'"${{{RET_VAR}}}"'
if method == 'contains':
arg = _expr(args[0], ctx)
ctx.emit(f'[[ "${{{recv_name}}}" == *{arg}* ]] && {RET_VAR}=true || {RET_VAR}=false')
return f'"${{{RET_VAR}}}"'
if method == 'starts':
arg = _expr(args[0], ctx)
ctx.emit(f'[[ "${{{recv_name}}}" == {arg}* ]] && {RET_VAR}=true || {RET_VAR}=false')
return f'"${{{RET_VAR}}}"'
if method == 'ends':
arg = _expr(args[0], ctx)
ctx.emit(f'[[ "${{{recv_name}}}" == *{arg} ]] && {RET_VAR}=true || {RET_VAR}=false')
return f'"${{{RET_VAR}}}"'
if method == 'substr':
start_arg = _expr(args[0], ctx)
len_arg = _expr(args[1], ctx)
start_bare = _to_arith(start_arg)
len_bare = _to_arith(len_arg)
ctx.emit(f'{RET_VAR}="${{{recv_name}:{start_bare}:{len_bare}}}"')
return f'"${{{RET_VAR}}}"'
if method == 'index':
arg = _expr(args[0], ctx)
tmp_p = ctx.fresh_tmp()
tmp = ctx.fresh_tmp()
ctx.emit(f'{tmp_p}={arg}')
ctx.emit(f'{tmp}="${{{recv_name}%%"${{{tmp_p}}}"*}}"')
ctx.emit(f'[[ "${{{tmp}}}" == "${{{recv_name}}}" ]] && {RET_VAR}=-1 || {RET_VAR}=${{#{tmp}}}')
return f'"${{{RET_VAR}}}"'
return None
def _stdlib_method_expr(node: IRMethodCall, ctx: 'EmitContext') -> str:
method = node.method_name
recv = _expr(node.receiver, ctx)
......@@ -768,6 +842,12 @@ def _stdlib_method_expr(node: IRMethodCall, ctx: 'EmitContext') -> str:
if method == 'len' and (is_known_array or is_actual_array) and not is_field_recv:
return '"${#' + recv_name + '[@]}"'
# arr.get(idx) fast path — direct subscript access
if method == 'get' and (is_known_array or is_actual_array) and not is_field_recv and not is_param_array:
idx_expr = _expr(node.args[0], ctx)
idx_bare = idx_expr.strip('"').strip('${}')
return f'"${{{recv_name}[{idx_bare}]}}"'
args = _ct_args(node.args, ctx)
# Array path — field access uses recv directly (it expands to the array name)
......@@ -799,6 +879,12 @@ def _stdlib_method_expr(node: IRMethodCall, ctx: 'EmitContext') -> str:
ctx.emit(f'{fn} {call_args}')
return f'"${{{RET_VAR}}}"'
# String inlining fast path: only for simple variable receivers
if isinstance(node.receiver, IRIdentifier) and not is_known_array and not is_known_dict:
inlined = _try_inline_string_method(recv_name, method, node.args, ctx)
if inlined is not None:
return inlined
# String methods (and fallback)
prefix = _STDLIB_METHOD_PREFIX.get(method, STR_PREFIX)
fn = f'{prefix}{method}'
......@@ -852,6 +938,12 @@ def _stdlib_method_stmt(node: IRMethodCall, ctx: 'EmitContext') -> None:
if method == 'shift':
ctx.emit(f'{recv_name}=("${{{recv_name}[@]:1}}")')
return
if method == 'set':
idx_expr = _expr(node.args[0], ctx)
val_expr = _expr(node.args[1], ctx)
idx_bare = idx_expr.strip('"').strip('${}')
ctx.emit(f'{recv_name}[{idx_bare}]={val_expr}')
return
if (is_known_dict or is_actual_dict) and not is_field_recv:
if method == 'set':
......
......@@ -479,14 +479,46 @@ def _while(node: IRWhile, ctx: 'EmitContext') -> None:
ctx.emit('done')
def _emit_c_style_for(var: str, args: list, ctx: 'EmitContext') -> None:
from .expr import _to_arith
from .constants import RET_VAR
def _safe_bound(val: str) -> str:
if RET_VAR in val:
tmp = ctx.fresh_tmp()
ctx.emit(f'{tmp}={val}')
return f'${{{tmp}}}'
return val
if len(args) == 1:
end = _safe_bound(_to_arith(_expr(args[0], ctx)))
ctx.emit(f'for (({var}=0; {var}<{end}; {var}++)); do')
elif len(args) == 2:
start = _safe_bound(_to_arith(_expr(args[0], ctx)))
end = _safe_bound(_to_arith(_expr(args[1], ctx)))
ctx.emit(f'for (({var}={start}; {var}<{end}; {var}++)); do')
elif len(args) == 3:
start = _safe_bound(_to_arith(_expr(args[0], ctx)))
end = _safe_bound(_to_arith(_expr(args[1], ctx)))
step = _safe_bound(_to_arith(_expr(args[2], ctx)))
ctx.emit(f'for (({var}={start}; {var}<{end}; {var}+={step})); do')
else:
args_str = _ct_args(args, ctx)
ctx.emit(f'for {var} in $(__ct_range {args_str}); do')
def _for(node: IRFor, ctx: 'EmitContext') -> None:
"""for i in range(...)"""
from ...ir.nodes import IRCall
from .expr import _BUILTIN_BASH
if isinstance(node.iterable, IRCall):
bn = _BUILTIN_BASH.get(node.iterable.callee_name, node.iterable.callee_name)
args = _ct_args(node.iterable.args, ctx)
ctx.emit(f'for {node.variable} in $({bn} {args}); do')
cname = node.iterable.callee_name
if cname in ('range', '__ct_range'):
_emit_c_style_for(node.variable, node.iterable.args, ctx)
else:
bn = _BUILTIN_BASH.get(cname, cname)
args = _ct_args(node.iterable.args, ctx)
ctx.emit(f'for {node.variable} in $({bn} {args}); do')
else:
iterable = _expr(node.iterable, ctx) if node.iterable else '"$@"'
ctx.emit(f'for {node.variable} in {iterable}; do')
......@@ -535,15 +567,18 @@ def _foreach(node: IRForeach, ctx: 'EmitContext') -> None:
ctx.emit('done')
return
# range() call → iterate over stdout sequence
# range() call → C-style for loop (no subshell)
from ...ir.nodes import IRCall as _IRCall
if isinstance(iterable, _IRCall):
bash_name = iterable.callee_name
from .expr import _BUILTIN_BASH
bash_name = _BUILTIN_BASH.get(bash_name, bash_name)
args = _ct_args(iterable.args, ctx)
cname = iterable.callee_name
var = vars_[0] if vars_ else '__ct_item'
ctx.emit(f'for {var} in $({bash_name} {args}); do')
if cname in ('range', '__ct_range'):
_emit_c_style_for(var, iterable.args, ctx)
else:
from .expr import _BUILTIN_BASH
bash_name = _BUILTIN_BASH.get(cname, cname)
args = _ct_args(iterable.args, ctx)
ctx.emit(f'for {var} in $({bash_name} {args}); do')
with ctx.indented():
if node.body:
emit_block(node.body, ctx)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment