Commit d68d4f71 authored by Roman Alifanov's avatar Roman Alifanov

Refactor: unified method registry and codegen cleanup

- Split methods.py into methods/ directory with separate modules - Add awk_builtin field to Method for unified AWK generation - Replace hardcoded method dispatch in awk_codegen with generate_awk() - Add RET_VAR/RET_ARR constants, replace hardcoded __CT_RET - Migrate all codegen files to use indented() context manager
parent c3bf25cc
......@@ -1021,24 +1021,29 @@ CodeGenerator
Вспомогательные модули:
├── constants.py # Константы (RET_VAR, TMP_PREFIX, CLASS_FUNC_PREFIX, etc.)
└── methods.py # Единый реестр методов для bash/awk синхронизации
└── methods/ # Единый реестр методов (bash_impl + awk_gen)
├── base.py # Method dataclass
├── string.py # StringMethods
├── array.py # ArrayMethods
├── dict.py # DictMethods
└── ... # http, fs, json, logger, math, time, etc.
```
### Добавление новых методов
Для добавления нового метода достаточно обновить `methods.py`:
Для добавления нового метода достаточно обновить соответствующий файл в `methods/`:
```python
STRING_METHODS = {
# methods/string.py
class StringMethods:
...
"new_method": MethodDef(
"new_method",
min_args=1,
max_args=1,
new_method = Method(
name="new_method",
bash_func="__ct_str_new_method",
awk_gen=lambda obj, args: f"awk_impl({obj}, {args[0]})"
),
}
bash_impl='__CT_RET="${1}..."; echo "$__CT_RET"',
awk_gen=lambda obj, args: f"awk_impl({obj}, {args[0]})",
min_args=1, max_args=1,
)
```
Bash и AWK codegen автоматически подхватят изменения.
......@@ -366,7 +366,12 @@ bootstrap/ # Bootstrap compiler (Python)
├── ast_nodes.py # AST node classes
├── errors.py # Error handling
├── constants.py # Codegen constants (RET_VAR, TMP_PREFIX, etc.)
├── methods.py # Unified method registry for bash/awk sync
├── methods/ # Unified method registry (bash + awk)
│ ├── base.py # Method dataclass
│ ├── string.py # String methods
│ ├── array.py # Array methods
│ ├── dict.py # Dict methods
│ └── ... # http, fs, json, logger, math, time, etc.
├── dce.py # Dead code elimination
├── codegen.py # Main Bash code generator (mixin coordinator)
├── expr_codegen.py # Expression generation (mixin)
......
......@@ -366,7 +366,12 @@ bootstrap/ # Bootstrap-компилятор (Python)
├── ast_nodes.py # Классы узлов AST
├── errors.py # Обработка ошибок
├── constants.py # Константы кодогенерации (RET_VAR, TMP_PREFIX, etc.)
├── methods.py # Единый реестр методов для bash/awk синхронизации
├── methods/ # Единый реестр методов (bash + awk)
│ ├── base.py # Method dataclass
│ ├── string.py # Строковые методы
│ ├── array.py # Методы массивов
│ ├── dict.py # Методы словарей
│ └── ... # http, fs, json, logger, math, time, etc.
├── dce.py # Устранение мёртвого кода
├── codegen.py # Основной генератор Bash-кода (координатор миксинов)
├── expr_codegen.py # Генерация выражений (миксин)
......
......@@ -8,33 +8,8 @@ from .ast_nodes import (
BoolLiteral, NilLiteral, ArrayLiteral, DictLiteral, BinaryOp,
UnaryOp, CallExpr, IndexAccess, MemberAccess
)
AWK_MATH_FUNCS = {
"sin": lambda a: f"sin({a[0]})",
"cos": lambda a: f"cos({a[0]})",
"sqrt": lambda a: f"sqrt({a[0]})",
"log": lambda a: f"log({a[0]})",
"exp": lambda a: f"exp({a[0]})",
"int": lambda a: f"int({a[0]})",
"rand": lambda a: "rand()",
"atan2": lambda a: f"atan2({a[0]}, {a[1]})",
}
AWK_BUILTIN_FUNCS = {
"print": lambda a: f"print {', '.join(a)}" if a else "print",
"printf": lambda a: f"printf {', '.join(a)}",
"sprintf": lambda a: f"sprintf({', '.join(a)})",
"length": lambda a: f"length({a[0]})" if a else "length()",
"substr": lambda a: f"substr({', '.join(a)})",
"split": lambda a: f"split({', '.join(a)})",
"sub": lambda a: f"sub({', '.join(a)})",
"gsub": lambda a: f"gsub({', '.join(a)})",
"match": lambda a: f"match({', '.join(a)})",
"tolower": lambda a: f"tolower({a[0]})" if a else "",
"toupper": lambda a: f"toupper({a[0]})" if a else "",
"int": lambda a: f"int({a[0]})" if a else "",
}
from .methods import get_awk_builtin, generate_awk, MATH_METHODS
from .constants import RET_VAR
class AwkCodegenMixin:
......@@ -93,8 +68,13 @@ class AwkCodegenMixin:
"""Generate a function that runs as inline AWK instead of Bash."""
name = func.name
self.emit (f"{name} () {{")
self.indent_level += 1
with self.indented():
self._generate_awk_function_body(func)
self.emit ("}")
self.emit ()
def _generate_awk_function_body (self, func: FunctionDecl):
"""Generate the body of an AWK function."""
validate_decorator = None
for dec in func.decorators:
if dec.name == "validate":
......@@ -154,7 +134,7 @@ class AwkCodegenMixin:
for stmt in after_stmts:
self._awk_stmt (stmt, end_emit, end_inc, end_dec)
self.emit (f"__CT_RET=$({awk_cmd} '")
self.emit (f"{RET_VAR}=$({awk_cmd} '")
for nf in nested_funcs:
self._awk_helper_func (nf)
......@@ -186,7 +166,7 @@ class AwkCodegenMixin:
for stmt in main_stmts:
self._awk_stmt (stmt, awk_emit, awk_inc, awk_dec)
self.emit (f"__CT_RET=$({awk_cmd} '")
self.emit (f"{RET_VAR}=$({awk_cmd} '")
for nf in nested_funcs:
self._awk_helper_func (nf)
......@@ -197,11 +177,8 @@ class AwkCodegenMixin:
self.emit ("}')")
self.emit ('local __awk_rc=$?')
self.emit ('echo "$__CT_RET"')
self.emit (f'echo "${{{RET_VAR}}}"')
self.emit ('return $__awk_rc')
self.indent_level -= 1
self.emit ("}")
self.emit ()
def _awk_helper_func (self, func: FunctionDecl):
"""Generate AWK helper function definition."""
......@@ -500,99 +477,27 @@ class AwkCodegenMixin:
method = expr.callee.member
args = expr.arguments
if ns == "math" and method in AWK_MATH_FUNCS:
awk_args = [self._awk_expr(a) for a in args]
return AWK_MATH_FUNCS[method](awk_args)
if ns == "math" and method in MATH_METHODS:
math_method = MATH_METHODS[method]
if math_method.awk_builtin:
awk_args = [self._awk_expr(a) for a in args]
return math_method.awk_builtin(awk_args)
var_types = getattr (self, '_awk_var_types', {})
var_type = var_types.get (ns, "string")
if var_type == "array":
if method == "len":
return f"length({ns})"
if method == "push" and len (args) >= 1:
val = self._awk_expr (args[0])
return f"{ns}[length({ns}) + 1] = {val}"
if method == "pop":
return f"delete {ns}[length({ns})]"
if method == "shift":
return f"delete {ns}[1]"
if method == "get" and len (args) >= 1:
idx = self._awk_expr (args[0])
return f"{ns}[{idx}]"
if method == "set" and len (args) >= 2:
idx = self._awk_expr (args[0])
val = self._awk_expr (args[1])
return f"{ns}[{idx}] = {val}"
if method == "has" and len (args) >= 1:
key = self._awk_expr (args[0])
return f"({key} in {ns})"
if method == "del" and len (args) >= 1:
key = self._awk_expr (args[0])
return f"delete {ns}[{key}]"
if method == "join" and len(args) >= 1:
sep = self._awk_expr(args[0])
return f"__ct_awk_join({ns}, {sep})"
elif var_type == "dict":
if method == "get" and len (args) >= 1:
key = self._awk_expr (args[0])
return f"{ns}[{key}]"
if method == "set" and len (args) >= 2:
key = self._awk_expr (args[0])
val = self._awk_expr (args[1])
return f"{ns}[{key}] = {val}"
if method == "has" and len (args) >= 1:
key = self._awk_expr (args[0])
return f"({key} in {ns})"
if method == "del" and len (args) >= 1:
key = self._awk_expr (args[0])
return f"delete {ns}[{key}]"
if method == "keys":
return f"{ns}"
else:
if method == "len":
return f"length({ns})"
if method == "upper":
return f"toupper({ns})"
if method == "lower":
return f"tolower({ns})"
if method == "contains" and len (args) >= 1:
needle = self._awk_expr (args[0])
return f"(index({ns}, {needle}) > 0)"
if method == "index" and len (args) >= 1:
needle = self._awk_expr (args[0])
return f"(index({ns}, {needle}) - 1)"
if method == "substr" and len (args) >= 2:
start = self._awk_expr (args[0])
length = self._awk_expr (args[1])
return f"substr({ns}, {start} + 1, {length})"
if method == "charAt" and len (args) >= 1:
pos = self._awk_expr (args[0])
return f"substr({ns}, {pos} + 1, 1)"
if method == "trim":
return f"(gsub(/^[ \\t]+|[ \\t]+$/, \"\", {ns}) ? {ns} : {ns})"
if method == "replace" and len (args) >= 2:
old = self._awk_expr (args[0])
new = self._awk_expr (args[1])
return f"(gsub({old}, {new}, {ns}) ? {ns} : {ns})"
if method == "split" and len (args) >= 1:
delim = self._awk_expr (args[0])
return f"split({ns}, __split_arr, {delim})"
if method == "starts" and len (args) >= 1:
prefix = self._awk_expr (args[0])
return f"(substr({ns}, 1, length({prefix})) == {prefix})"
if method == "ends" and len (args) >= 1:
suffix = self._awk_expr (args[0])
return f"(substr({ns}, length({ns}) - length({suffix}) + 1) == {suffix})"
type_name = {"array": "array", "dict": "dict"}.get(var_type, "string")
awk_args = [self._awk_expr(a) for a in args]
awk_code = generate_awk(type_name, method, ns, awk_args)
if awk_code:
return awk_code
if isinstance (expr.callee, Identifier):
func_name = expr.callee.name
args = [self._awk_expr(a) for a in expr.arguments]
if func_name in AWK_BUILTIN_FUNCS:
return AWK_BUILTIN_FUNCS[func_name](args)
awk_code = get_awk_builtin(func_name, args)
if awk_code:
return awk_code
return f"{func_name}({', '.join(args)})"
......
......@@ -189,31 +189,27 @@ class CodeGenerator(StdlibMixin, AwkCodegenMixin, ExprMixin, StmtMixin,
self.emit('echo')
self.emit('__ct_run_tests () {')
self.indent_level += 1
self.emit('local __test_failed=0')
for func_name, description in self.test_functions:
escaped_desc = description.replace('"', '\\"')
self.emit(f'__ct_test_start "{escaped_desc}"')
self.emit('local __prev_failed=$__ct_test_failed')
self.emit(f'if {func_name}; then')
self.indent_level += 1
self.emit('__ct_test_pass')
self.indent_level -= 1
self.emit('else')
self.indent_level += 1
self.emit('if [[ $__ct_test_failed -eq $__prev_failed ]]; then')
self.indent_level += 1
self.emit('__ct_test_fail ""')
self.indent_level -= 1
self.emit('fi')
self.emit('__test_failed=1')
self.indent_level -= 1
self.emit('fi')
self.emit()
self.emit('__ct_test_summary')
self.indent_level -= 1
with self.indented():
self.emit('local __test_failed=0')
for func_name, description in self.test_functions:
escaped_desc = description.replace('"', '\\"')
self.emit(f'__ct_test_start "{escaped_desc}"')
self.emit('local __prev_failed=$__ct_test_failed')
self.emit(f'if {func_name}; then')
with self.indented():
self.emit('__ct_test_pass')
self.emit('else')
with self.indented():
self.emit('if [[ $__ct_test_failed -eq $__prev_failed ]]; then')
with self.indented():
self.emit('__ct_test_fail ""')
self.emit('fi')
self.emit('__test_failed=1')
self.emit('fi')
self.emit()
self.emit('__ct_test_summary')
self.emit('}')
self.emit()
self.emit('__ct_run_tests')
"""Constants for bash code generation."""
RET_VAR = "__CT_RET"
RET_ARR = "__CT_RET_ARR"
TMP_PREFIX = "__ct_tmp_"
CLASS_FUNC_PREFIX = "__ct_class_"
LAMBDA_PREFIX = "__ct_lambda_"
......
......@@ -3,6 +3,7 @@ from .ast_nodes import (
Expression, CallExpr, MemberAccess, ThisExpr, Identifier,
BinaryOp, UnaryOp, BoolLiteral
)
from .constants import RET_VAR
class NodeIdMap:
......@@ -76,7 +77,7 @@ class CseMixin:
if key not in seen:
temp = self.new_temp()
call_line = f'__ct_class_{self.current_class}_{method} "$this" {args_str} >/dev/null'
assign_line = f'{temp}="$__CT_RET"'
assign_line = f'{temp}="${{{RET_VAR}}}"'
self.emit(call_line)
self.emit(assign_line)
seen[key] = temp
......@@ -105,7 +106,7 @@ class CseMixin:
if key not in seen:
temp = self.new_temp()
call_line = f'__ct_class_{self.current_class}_{method} "$this" {args_str} >/dev/null'
assign_line = f'{temp}="$__CT_RET"'
assign_line = f'{temp}="${{{RET_VAR}}}"'
self.emit(call_line)
self.emit(assign_line)
seen[key] = temp
......@@ -140,7 +141,7 @@ class CseMixin:
if key not in seen:
temp = self.new_temp()
call_line = f'{func_name} {args_str} >/dev/null'
assign_line = f'{temp}="$__CT_RET"'
assign_line = f'{temp}="${{{RET_VAR}}}"'
self.emit(call_line)
self.emit(assign_line)
seen[key] = temp
......
......@@ -593,23 +593,20 @@ class ExprMixin:
def generate_lambda_as_function(self, expr: Lambda, name: str):
"""Generate lambda as a named function."""
self.emit(f"{name} () {{")
self.indent_level += 1
with self.indented():
for i, param in enumerate(expr.params):
self.emit(f'local {param}="${{{i + 1}}}"')
for i, param in enumerate(expr.params):
self.emit(f'local {param}="${{{i + 1}}}"')
if isinstance(expr.body, Block):
for stmt in expr.body.statements:
self.generate_statement(stmt)
else:
if self._is_boolean_expr(expr.body):
cond = self.generate_condition(expr.body)
self.emit(f'{cond} && echo "true" || echo "false"')
if isinstance(expr.body, Block):
for stmt in expr.body.statements:
self.generate_statement(stmt)
else:
result = self.generate_expr(expr.body)
self.emit(f'echo "{result}"')
self.indent_level -= 1
if self._is_boolean_expr(expr.body):
cond = self.generate_condition(expr.body)
self.emit(f'{cond} && echo "true" || echo "false"')
else:
result = self.generate_expr(expr.body)
self.emit(f'echo "{result}"')
self.emit("}")
self.emit()
......
"""Unified method registry for bash and AWK code generation.
This module provides a single source of truth for all builtin methods,
ensuring consistency between bash and AWK code generators.
"""
from dataclasses import dataclass
from typing import Optional, Callable, List
@dataclass
class MethodDef:
"""Definition of a builtin method."""
name: str
min_args: int = 0
max_args: Optional[int] = None
bash_func: Optional[str] = None
awk_gen: Optional[Callable[[str, List[str]], str]] = None
returns_array: bool = False
STRING_METHODS = {
"len": MethodDef("len", 0, 0, "__ct_str_len",
lambda obj, args: f"length({obj})"),
"upper": MethodDef("upper", 0, 0, "__ct_str_upper",
lambda obj, args: f"toupper({obj})"),
"lower": MethodDef("lower", 0, 0, "__ct_str_lower",
lambda obj, args: f"tolower({obj})"),
"trim": MethodDef("trim", 0, 0, "__ct_str_trim",
lambda obj, args: f'(gsub(/^[ \\t]+|[ \\t]+$/, "", {obj}) ? {obj} : {obj})'),
"contains": MethodDef("contains", 1, 1, "__ct_str_contains",
lambda obj, args: f"(index({obj}, {args[0]}) > 0)"),
"starts": MethodDef("starts", 1, 1, "__ct_str_starts",
lambda obj, args: f"(substr({obj}, 1, length({args[0]})) == {args[0]})"),
"ends": MethodDef("ends", 1, 1, "__ct_str_ends",
lambda obj, args: f"(substr({obj}, length({obj}) - length({args[0]}) + 1) == {args[0]})"),
"index": MethodDef("index", 1, 1, "__ct_str_index",
lambda obj, args: f"(index({obj}, {args[0]}) - 1)"),
"replace": MethodDef("replace", 2, 2, "__ct_str_replace",
lambda obj, args: f"(gsub({args[0]}, {args[1]}, {obj}) ? {obj} : {obj})"),
"substr": MethodDef("substr", 2, 2, "__ct_str_substr",
lambda obj, args: f"substr({obj}, {args[0]} + 1, {args[1]})"),
"split": MethodDef("split", 1, 1, "__ct_str_split",
lambda obj, args: f"split({obj}, __split_arr, {args[0]})",
returns_array=True),
"charAt": MethodDef("charAt", 1, 1, "__ct_str_char_at",
lambda obj, args: f"substr({obj}, {args[0]} + 1, 1)"),
"urlencode": MethodDef("urlencode", 0, 0, "__ct_str_urlencode", None),
}
ARRAY_METHODS = {
"len": MethodDef("len", 0, 0, "__ct_arr_len",
lambda obj, args: f"length({obj})"),
"push": MethodDef("push", 1, 1, "__ct_arr_push",
lambda obj, args: f"{obj}[length({obj}) + 1] = {args[0]}"),
"pop": MethodDef("pop", 0, 0, "__ct_arr_pop",
lambda obj, args: f"delete {obj}[length({obj})]"),
"shift": MethodDef("shift", 0, 0, "__ct_arr_shift",
lambda obj, args: f"delete {obj}[1]"),
"join": MethodDef("join", 1, 1, "__ct_arr_join",
lambda obj, args: f"__ct_awk_join({obj}, {args[0]})"),
"get": MethodDef("get", 1, 1, "__ct_arr_get",
lambda obj, args: f"{obj}[{args[0]}]"),
"set": MethodDef("set", 2, 2, "__ct_arr_set",
lambda obj, args: f"{obj}[{args[0]}] = {args[1]}"),
"slice": MethodDef("slice", 2, 2, "__ct_arr_slice", None, returns_array=True),
"map": MethodDef("map", 1, 1, "__ct_arr_map", None, returns_array=True),
"filter": MethodDef("filter", 1, 1, "__ct_arr_filter", None, returns_array=True),
}
DICT_METHODS = {
"get": MethodDef("get", 1, 1, "__ct_dict_get",
lambda obj, args: f"{obj}[{args[0]}]"),
"set": MethodDef("set", 2, 2, "__ct_dict_set",
lambda obj, args: f"{obj}[{args[0]}] = {args[1]}"),
"has": MethodDef("has", 1, 1, "__ct_dict_has",
lambda obj, args: f"({args[0]} in {obj})"),
"del": MethodDef("del", 1, 1, "__ct_dict_del",
lambda obj, args: f"delete {obj}[{args[0]}]"),
"keys": MethodDef("keys", 0, 0, "__ct_dict_keys", None, returns_array=True),
}
FILE_HANDLE_METHODS = {
"read": MethodDef("read", 0, 0, "__ct_fh_read", None),
"readline": MethodDef("readline", 0, 0, "__ct_fh_readline", None),
"write": MethodDef("write", 1, 1, "__ct_fh_write", None),
"writeln": MethodDef("writeln", 1, 1, "__ct_fh_writeln", None),
"close": MethodDef("close", 0, 0, "__ct_fh_close", None),
}
NAMESPACE_METHODS = {
"fs": {"read", "write", "append", "exists", "remove", "mkdir", "list", "open"},
"http": {"get", "post", "put", "delete"},
"json": {"parse", "stringify", "get"},
"logger": {"info", "warn", "error", "debug"},
"regex": {"match", "extract"},
"args": {"count", "get"},
"shell": {"exec", "capture", "source"},
"time": {"now", "ms"},
"math": {"add", "sub", "mul", "div", "mod", "min", "max", "abs"},
}
BUILTIN_NAMESPACES = set(NAMESPACE_METHODS.keys())
BUILTIN_FUNCS = {"print", "exit", "len", "range", "ngrep", "is_number",
"is_empty", "chr", "ord", "assert", "assert_eq", "random", "random_range"}
def get_method_names(type_name: str) -> set:
"""Get all available method names for a type."""
if type_name == "string":
return set(STRING_METHODS.keys())
elif type_name == "array":
return set(ARRAY_METHODS.keys())
elif type_name == "dict":
return set(DICT_METHODS.keys())
elif type_name == "file_handle":
return set(FILE_HANDLE_METHODS.keys())
return set()
def get_method_def(type_name: str, method_name: str) -> Optional[MethodDef]:
"""Get method definition by type and name."""
methods = {
"string": STRING_METHODS,
"array": ARRAY_METHODS,
"dict": DICT_METHODS,
"file_handle": FILE_HANDLE_METHODS,
}
return methods.get(type_name, {}).get(method_name)
def get_bash_func(type_name: str, method_name: str) -> Optional[str]:
"""Get bash function name for a method."""
method = get_method_def(type_name, method_name)
return method.bash_func if method else None
def generate_awk(type_name: str, method_name: str, obj: str, args: List[str]) -> Optional[str]:
"""Generate AWK code for a method call."""
method = get_method_def(type_name, method_name)
if method and method.awk_gen:
return method.awk_gen(obj, args)
return None
from .base import Method, collect_methods
from .string import StringMethods
from .array import ArrayMethods
from .dict import DictMethods
from .file_handle import FileHandleMethods
from .http import HttpMethods
from .fs import FsMethods
from .json import JsonMethods
from .logger import LoggerMethods
from .regex import RegexMethods
from .math import MathMethods
from .time import TimeMethods
from .args import ArgsMethods
from .core import CoreFunctions, AwkBuiltinFunctions
STRING_METHODS = collect_methods(StringMethods)
ARRAY_METHODS = collect_methods(ArrayMethods)
DICT_METHODS = collect_methods(DictMethods)
FILE_HANDLE_METHODS = collect_methods(FileHandleMethods)
HTTP_METHODS = collect_methods(HttpMethods)
FS_METHODS = collect_methods(FsMethods)
JSON_METHODS = collect_methods(JsonMethods)
LOGGER_METHODS = collect_methods(LoggerMethods)
REGEX_METHODS = collect_methods(RegexMethods)
MATH_METHODS = collect_methods(MathMethods)
TIME_METHODS = collect_methods(TimeMethods)
ARGS_METHODS = collect_methods(ArgsMethods)
CORE_FUNCTIONS = collect_methods(CoreFunctions)
AWK_BUILTIN_FUNCTIONS = collect_methods(AwkBuiltinFunctions)
NAMESPACE_REGISTRY = {
"fs": FS_METHODS,
"http": HTTP_METHODS,
"json": JSON_METHODS,
"logger": LOGGER_METHODS,
"regex": REGEX_METHODS,
"args": ARGS_METHODS,
"time": TIME_METHODS,
"math": MATH_METHODS,
"shell": {"exec", "capture", "source"},
}
NAMESPACE_METHODS = {ns: set(methods.keys()) if hasattr(methods, 'keys') else methods
for ns, methods in NAMESPACE_REGISTRY.items()}
BUILTIN_NAMESPACES = set(NAMESPACE_METHODS.keys())
BUILTIN_FUNCS = set(CORE_FUNCTIONS.keys()) | {"chr", "ord", "assert", "assert_eq"}
def get_method(type_name: str, method_name: str):
registry = {
"string": STRING_METHODS,
"array": ARRAY_METHODS,
"dict": DICT_METHODS,
"file_handle": FILE_HANDLE_METHODS,
}
return registry.get(type_name, {}).get(method_name)
def get_method_names(type_name: str) -> set:
registry = {
"string": STRING_METHODS,
"array": ARRAY_METHODS,
"dict": DICT_METHODS,
"file_handle": FILE_HANDLE_METHODS,
}
methods = registry.get(type_name, {})
return set(methods.keys())
def get_bash_func(type_name: str, method_name: str):
method = get_method(type_name, method_name)
return method.bash_func if method else None
def generate_awk(type_name: str, method_name: str, obj: str, args: list):
method = get_method(type_name, method_name)
if method and method.awk_gen:
return method.awk_gen(obj, args)
return None
def get_awk_builtin(func_name: str, args: list):
"""Get AWK code for a builtin function call."""
method = CORE_FUNCTIONS.get(func_name) or AWK_BUILTIN_FUNCTIONS.get(func_name)
if method and method.awk_builtin:
return method.awk_builtin(args)
return None
from .base import Method
class ArgsMethods:
count = Method(name="count", bash_func="__ct_args_count", bash_impl='echo ${#__ct_args[@]}')
get = Method(name="get", bash_func="__ct_args_get", bash_impl="printf '%s\\n' \"${__ct_args[$1]}\"", min_args=1, max_args=1)
from .base import Method
class ArrayMethods:
len = Method(
name="len",
bash_func="__ct_arr_len",
bash_impl='local -n __a=$1; __CT_RET=${#__a[@]}; echo "$__CT_RET"',
awk_gen=lambda obj, args: f"length({obj})",
)
push = Method(
name="push",
bash_func="__ct_arr_push",
bash_impl='local -n __a=$1; shift; __a+=("$@")',
awk_gen=lambda obj, args: f"{obj}[length({obj}) + 1] = {args[0]}",
min_args=1, max_args=1,
)
pop = Method(
name="pop",
bash_func="__ct_arr_pop",
bash_impl="local -n __a=$1; unset '__a[-1]'",
awk_gen=lambda obj, args: f"delete {obj}[length({obj})]",
)
shift = Method(
name="shift",
bash_func="__ct_arr_shift",
bash_impl='local -n __a=$1; __CT_RET="${__a[0]}"; __a=("${__a[@]:1}"); echo "$__CT_RET"',
awk_gen=lambda obj, args: f"delete {obj}[1]",
)
join = Method(
name="join",
bash_func="__ct_arr_join",
bash_impl='local -n __a=$1; local sep; printf -v sep \'%b\' "$2"; local IFS="$sep"; __CT_RET="${__a[*]}"; echo "$__CT_RET"',
awk_gen=lambda obj, args: f"__ct_awk_join({obj}, {args[0]})",
min_args=1, max_args=1,
)
get = Method(
name="get",
bash_func="__ct_arr_get",
bash_impl='local -n __a=$1; __CT_RET="${__a[$2]}"; echo "$__CT_RET"',
awk_gen=lambda obj, args: f"{obj}[{args[0]}]",
min_args=1, max_args=1,
)
set = Method(
name="set",
bash_func="__ct_arr_set",
bash_impl='local -n __a=$1; __a[$2]="$3"',
awk_gen=lambda obj, args: f"{obj}[{args[0]}] = {args[1]}",
min_args=2, max_args=2,
)
slice = Method(
name="slice",
bash_func="__ct_arr_slice",
bash_impl='local -n __a=$1; __CT_RET_ARR=("${__a[@]:$2:$3}")',
min_args=2, max_args=2,
returns_array=True,
)
map = Method(
name="map",
bash_func="__ct_arr_map",
bash_impl=None,
min_args=1, max_args=1,
returns_array=True,
)
filter = Method(
name="filter",
bash_func="__ct_arr_filter",
bash_impl=None,
min_args=1, max_args=1,
returns_array=True,
)
from dataclasses import dataclass
from typing import Optional, Callable, List
@dataclass
class Method:
name: str
bash_func: str
bash_impl: str = ""
awk_gen: Optional[Callable[[str, List[str]], str]] = None
awk_builtin: Optional[Callable[[List[str]], str]] = None
min_args: int = 0
max_args: Optional[int] = None
returns_array: bool = False
def collect_methods(cls) -> dict:
return {
name: getattr(cls, name)
for name in dir(cls)
if isinstance(getattr(cls, name), Method)
}
from .base import Method
class CoreFunctions:
print = Method(
name="print",
bash_func="__ct_print",
bash_impl='local msg="$1"; echo -e "$msg" >&3',
awk_builtin=lambda a: f"print {', '.join(a)}" if a else "print",
min_args=1, max_args=1,
)
exit = Method(
name="exit",
bash_func="__ct_exit",
bash_impl='exit "${1:-0}"',
awk_builtin=lambda a: f"exit {a[0]}" if a else "exit",
max_args=1,
)
len = Method(
name="len",
bash_func="__ct_len",
bash_impl='local -n arr=$1; echo "${#arr[@]}"',
awk_builtin=lambda a: f"length({a[0]})" if a else "length()",
min_args=1, max_args=1,
)
range = Method(
name="range",
bash_func="__ct_range",
bash_impl=None,
min_args=1, max_args=3,
)
is_number = Method(
name="is_number",
bash_func="__ct_is_number",
bash_impl='[[ "$1" =~ ^-?[0-9]+$ ]] && echo true || echo false',
awk_builtin=lambda a: f"({a[0]} ~ /^-?[0-9]+$/)",
min_args=1, max_args=1,
)
is_empty = Method(
name="is_empty",
bash_func="__ct_is_empty",
bash_impl='[[ -z "$1" ]] && echo true || echo false',
awk_builtin=lambda a: f"(length({a[0]}) == 0)",
min_args=1, max_args=1,
)
ngrep = Method(
name="ngrep",
bash_func="__ct_ngrep",
bash_impl='echo "$2" | grep -n "$1" || true',
min_args=2, max_args=2,
)
random = Method(
name="random",
bash_func="__ct_random",
bash_impl='echo $RANDOM',
awk_builtin=lambda a: "int(rand() * 32768)",
)
random_range = Method(
name="random_range",
bash_func="__ct_random_range",
bash_impl='echo $(($1 + RANDOM % ($2 - $1 + 1)))',
awk_builtin=lambda a: f"int({a[0]} + rand() * ({a[1]} - {a[0]} + 1))",
min_args=2, max_args=2,
)
class AwkBuiltinFunctions:
printf = Method(
name="printf",
bash_func="",
awk_builtin=lambda a: f"printf {', '.join(a)}",
min_args=1,
)
sprintf = Method(
name="sprintf",
bash_func="",
awk_builtin=lambda a: f"sprintf({', '.join(a)})",
min_args=1,
)
substr = Method(
name="substr",
bash_func="",
awk_builtin=lambda a: f"substr({', '.join(a)})",
min_args=2, max_args=3,
)
split = Method(
name="split",
bash_func="",
awk_builtin=lambda a: f"split({', '.join(a)})",
min_args=2, max_args=3,
)
sub = Method(
name="sub",
bash_func="",
awk_builtin=lambda a: f"sub({', '.join(a)})",
min_args=2, max_args=3,
)
gsub = Method(
name="gsub",
bash_func="",
awk_builtin=lambda a: f"gsub({', '.join(a)})",
min_args=2, max_args=3,
)
match = Method(
name="match",
bash_func="",
awk_builtin=lambda a: f"match({', '.join(a)})",
min_args=2,
)
tolower = Method(
name="tolower",
bash_func="",
awk_builtin=lambda a: f"tolower({a[0]})" if a else "",
min_args=1, max_args=1,
)
toupper = Method(
name="toupper",
bash_func="",
awk_builtin=lambda a: f"toupper({a[0]})" if a else "",
min_args=1, max_args=1,
)
int_ = Method(
name="int",
bash_func="",
awk_builtin=lambda a: f"int({a[0]})" if a else "",
min_args=1, max_args=1,
)
from .base import Method
class DictMethods:
get = Method(
name="get",
bash_func="__ct_dict_get",
bash_impl='local -n __d="$1"; __CT_RET="${__d[$2]}"; echo "$__CT_RET"',
awk_gen=lambda obj, args: f"{obj}[{args[0]}]",
min_args=1, max_args=1,
)
set = Method(
name="set",
bash_func="__ct_dict_set",
bash_impl='local -n __d="$1"; __d["$2"]="$3"',
awk_gen=lambda obj, args: f"{obj}[{args[0]}] = {args[1]}",
min_args=2, max_args=2,
)
has = Method(
name="has",
bash_func="__ct_dict_has",
bash_impl='local -n __d="$1"; [[ -v "__d[$2]" ]] && __CT_RET=true || __CT_RET=false; echo "$__CT_RET"',
awk_gen=lambda obj, args: f"({args[0]} in {obj})",
min_args=1, max_args=1,
)
delete = Method(
name="del",
bash_func="__ct_dict_del",
bash_impl='local -n __d="$1"; unset "__d[$2]"',
awk_gen=lambda obj, args: f"delete {obj}[{args[0]}]",
min_args=1, max_args=1,
)
keys = Method(
name="keys",
bash_func="__ct_dict_keys",
bash_impl='local -n __d="$1"; __CT_RET_ARR=("${!__d[@]}")',
returns_array=True,
)
len = Method(
name="len",
bash_func="__ct_dict_len",
bash_impl='local -n __d="$1"; __CT_RET=${#__d[@]}; echo "$__CT_RET"',
awk_gen=lambda obj, args: f"length({obj})",
)
from .base import Method
class FileHandleMethods:
read = Method(
name="read",
bash_func="__ct_fh_read",
bash_impl='local h="$1"; local path="${__ct_file_handles[${h}_path]}"; __CT_RET=$(cat "$path"); echo "$__CT_RET"',
)
readline = Method(
name="readline",
bash_func="__ct_fh_readline",
bash_impl='local h="$1"; local path="${__ct_file_handles[${h}_path]}"; local pos="${__ct_file_handles[${h}_pos]:-1}"; __CT_RET=$(sed -n "${pos}p" "$path"); __ct_file_handles[${h}_pos]=$((pos + 1)); echo "$__CT_RET"',
)
write = Method(
name="write",
bash_func="__ct_fh_write",
bash_impl='local h="$1" data="$2"; local path="${__ct_file_handles[${h}_path]}"; local mode="${__ct_file_handles[${h}_mode]}"; [[ "$mode" == "a" ]] && echo -n "$data" >> "$path" || echo -n "$data" > "$path"',
min_args=1, max_args=1,
)
writeln = Method(
name="writeln",
bash_func="__ct_fh_writeln",
bash_impl='local h="$1" data="$2"; local path="${__ct_file_handles[${h}_path]}"; local mode="${__ct_file_handles[${h}_mode]}"; [[ "$mode" == "a" ]] && echo "$data" >> "$path" || echo "$data" > "$path"',
min_args=1, max_args=1,
)
close = Method(
name="close",
bash_func="__ct_fh_close",
bash_impl='local h="$1"; for k in path fd mode pos; do unset "__ct_file_handles[${h}_$k]"; done',
)
from .base import Method
class FsMethods:
read = Method(
name="read",
bash_func="__ct_fs_read",
bash_impl='cat "$1"',
min_args=1, max_args=1,
)
write = Method(
name="write",
bash_func="__ct_fs_write",
bash_impl='echo -n "$2" > "$1"',
min_args=2, max_args=2,
)
append = Method(
name="append",
bash_func="__ct_fs_append",
bash_impl='echo -n "$2" >> "$1"',
min_args=2, max_args=2,
)
exists = Method(
name="exists",
bash_func="__ct_fs_exists",
bash_impl='[[ -e "$1" ]] && echo "true" || echo "false"',
min_args=1, max_args=1,
)
remove = Method(
name="remove",
bash_func="__ct_fs_remove",
bash_impl='rm -f "$1"',
min_args=1, max_args=1,
)
mkdir = Method(
name="mkdir",
bash_func="__ct_fs_mkdir",
bash_impl='mkdir -p "$1"',
min_args=1, max_args=1,
)
list = Method(
name="list",
bash_func="__ct_fs_list",
bash_impl='ls -1 "$1" 2>/dev/null || true',
min_args=1, max_args=1,
)
open = Method(
name="open",
bash_func="__ct_fs_open",
bash_impl=None,
min_args=1, max_args=2,
)
from .base import Method
class HttpMethods:
get = Method(
name="get",
bash_func="__ct_http_get",
bash_impl='local url="$1"; local timeout="${2:-30}"; curl -sS --fail --show-error --max-time "$timeout" "$url"',
min_args=1, max_args=2,
)
post = Method(
name="post",
bash_func="__ct_http_post",
bash_impl='local url="$1"; local data="$2"; local timeout="${3:-30}"; curl -sS --fail --show-error --max-time "$timeout" -X POST -H "Content-Type: application/json" -d "$data" "$url"',
min_args=2, max_args=3,
)
put = Method(
name="put",
bash_func="__ct_http_put",
bash_impl='local url="$1"; local data="$2"; local timeout="${3:-30}"; curl -sS --fail --show-error --max-time "$timeout" -X PUT -H "Content-Type: application/json" -d "$data" "$url"',
min_args=2, max_args=3,
)
delete = Method(
name="delete",
bash_func="__ct_http_delete",
bash_impl='local url="$1"; local timeout="${2:-30}"; curl -sS --fail --show-error --max-time "$timeout" -X DELETE "$url"',
min_args=1, max_args=2,
)
from .base import Method
class JsonMethods:
parse = Method(
name="parse",
bash_func="__ct_json_parse",
bash_impl=None,
min_args=1, max_args=2,
)
stringify = Method(
name="stringify",
bash_func="__ct_json_stringify",
bash_impl=None,
min_args=1, max_args=1,
)
get = Method(
name="get",
bash_func="__ct_json_get",
bash_impl='echo "$1" | jq -r "$2" 2>/dev/null',
min_args=2, max_args=2,
)
from .base import Method
class LoggerMethods:
info = Method(
name="info",
bash_func="__ct_logger_info",
bash_impl='echo "[INFO] $1"',
min_args=1, max_args=1,
)
warn = Method(
name="warn",
bash_func="__ct_logger_warn",
bash_impl='echo "[WARN] $1" >&2',
min_args=1, max_args=1,
)
error = Method(
name="error",
bash_func="__ct_logger_error",
bash_impl='echo "[ERROR] $1" >&2',
min_args=1, max_args=1,
)
debug = Method(
name="debug",
bash_func="__ct_logger_debug",
bash_impl='echo "[DEBUG] $1"',
min_args=1, max_args=1,
)
from .base import Method
class MathMethods:
add = Method(name="add", bash_func="__ct_math_add", bash_impl='echo $(($1 + $2))', min_args=2, max_args=2)
sub = Method(name="sub", bash_func="__ct_math_sub", bash_impl='echo $(($1 - $2))', min_args=2, max_args=2)
mul = Method(name="mul", bash_func="__ct_math_mul", bash_impl='echo $(($1 * $2))', min_args=2, max_args=2)
div = Method(name="div", bash_func="__ct_math_div", bash_impl='echo $(($1 / $2))', min_args=2, max_args=2)
mod = Method(name="mod", bash_func="__ct_math_mod", bash_impl='echo $(($1 % $2))', min_args=2, max_args=2)
min = Method(name="min", bash_func="__ct_math_min", bash_impl='(($1 < $2)) && echo $1 || echo $2', min_args=2, max_args=2)
max = Method(name="max", bash_func="__ct_math_max", bash_impl='(($1 > $2)) && echo $1 || echo $2', min_args=2, max_args=2)
abs = Method(name="abs", bash_func="__ct_math_abs", bash_impl='local n=$1; echo ${n#-}', min_args=1, max_args=1)
sin = Method(name="sin", bash_func="__ct_math_sin", bash_impl='__ct_awk "BEGIN{print sin($1)}"', awk_builtin=lambda a: f"sin({a[0]})", min_args=1, max_args=1)
cos = Method(name="cos", bash_func="__ct_math_cos", bash_impl='__ct_awk "BEGIN{print cos($1)}"', awk_builtin=lambda a: f"cos({a[0]})", min_args=1, max_args=1)
sqrt = Method(name="sqrt", bash_func="__ct_math_sqrt", bash_impl='__ct_awk "BEGIN{print sqrt($1)}"', awk_builtin=lambda a: f"sqrt({a[0]})", min_args=1, max_args=1)
log = Method(name="log", bash_func="__ct_math_log", bash_impl='__ct_awk "BEGIN{print log($1)}"', awk_builtin=lambda a: f"log({a[0]})", min_args=1, max_args=1)
exp = Method(name="exp", bash_func="__ct_math_exp", bash_impl='__ct_awk "BEGIN{print exp($1)}"', awk_builtin=lambda a: f"exp({a[0]})", min_args=1, max_args=1)
int_ = Method(name="int", bash_func="__ct_math_int", bash_impl='echo "${1%.*}"', awk_builtin=lambda a: f"int({a[0]})", min_args=1, max_args=1)
rand = Method(name="rand", bash_func="__ct_math_rand", bash_impl='__ct_awk "BEGIN{srand(); print rand()}"', awk_builtin=lambda a: "rand()")
atan2 = Method(name="atan2", bash_func="__ct_math_atan2", bash_impl='__ct_awk "BEGIN{print atan2($1, $2)}"', awk_builtin=lambda a: f"atan2({a[0]}, {a[1]})", min_args=2, max_args=2)
from .base import Method
class RegexMethods:
match = Method(
name="match",
bash_func="__ct_regex_match",
bash_impl='[[ "$1" =~ $2 ]] && echo true || echo false',
min_args=2, max_args=2,
)
extract = Method(
name="extract",
bash_func="__ct_regex_extract",
bash_impl='[[ "$1" =~ $2 ]] && echo "${BASH_REMATCH[0]}"',
min_args=2, max_args=2,
)
from .base import Method
class StringMethods:
len = Method(
name="len",
bash_func="__ct_str_len",
bash_impl='__CT_RET=${#1}; echo "$__CT_RET"',
awk_gen=lambda obj, args: f"length({obj})",
)
upper = Method(
name="upper",
bash_func="__ct_str_upper",
bash_impl='__CT_RET="${1^^}"; echo "$__CT_RET"',
awk_gen=lambda obj, args: f"toupper({obj})",
)
lower = Method(
name="lower",
bash_func="__ct_str_lower",
bash_impl='__CT_RET="${1,,}"; echo "$__CT_RET"',
awk_gen=lambda obj, args: f"tolower({obj})",
)
trim = Method(
name="trim",
bash_func="__ct_str_trim",
bash_impl='local s="$1"; s="${s#"${s%%[![:space:]]*}"}"; __CT_RET="${s%"${s##*[![:space:]]}"}" ; echo "$__CT_RET"',
awk_gen=lambda obj, args: f'(gsub(/^[ \\t]+|[ \\t]+$/, "", {obj}) ? {obj} : {obj})',
)
contains = Method(
name="contains",
bash_func="__ct_str_contains",
bash_impl='[[ "$1" == *"$2"* ]] && __CT_RET=true || __CT_RET=false; echo "$__CT_RET"',
awk_gen=lambda obj, args: f"(index({obj}, {args[0]}) > 0)",
min_args=1, max_args=1,
)
starts = Method(
name="starts",
bash_func="__ct_str_starts",
bash_impl='[[ "$1" == "$2"* ]] && __CT_RET=true || __CT_RET=false; echo "$__CT_RET"',
awk_gen=lambda obj, args: f"(substr({obj}, 1, length({args[0]})) == {args[0]})",
min_args=1, max_args=1,
)
ends = Method(
name="ends",
bash_func="__ct_str_ends",
bash_impl='[[ "$1" == *"$2" ]] && __CT_RET=true || __CT_RET=false; echo "$__CT_RET"',
awk_gen=lambda obj, args: f"(substr({obj}, length({obj}) - length({args[0]}) + 1) == {args[0]})",
min_args=1, max_args=1,
)
index = Method(
name="index",
bash_func="__ct_str_index",
bash_impl='local i="${1%%$2*}"; [[ "$i" == "$1" ]] && __CT_RET=-1 || __CT_RET=${#i}; echo "$__CT_RET"',
awk_gen=lambda obj, args: f"(index({obj}, {args[0]}) - 1)",
min_args=1, max_args=1,
)
replace = Method(
name="replace",
bash_func="__ct_str_replace",
bash_impl='__CT_RET="${1//"$2"/"$3"}"; echo "$__CT_RET"',
awk_gen=lambda obj, args: f"(gsub({args[0]}, {args[1]}, {obj}) ? {obj} : {obj})",
min_args=2, max_args=2,
)
substr = Method(
name="substr",
bash_func="__ct_str_substr",
bash_impl='__CT_RET="${1:$2:$3}"; echo "$__CT_RET"',
awk_gen=lambda obj, args: f"substr({obj}, {args[0]} + 1, {args[1]})",
min_args=2, max_args=2,
)
split = Method(
name="split",
bash_func="__ct_str_split",
bash_impl='local IFS="$2"; read -ra __CT_RET_ARR <<< "$1"',
awk_gen=lambda obj, args: f"split({obj}, __split_arr, {args[0]})",
min_args=1, max_args=1,
returns_array=True,
)
charAt = Method(
name="charAt",
bash_func="__ct_str_char_at",
bash_impl='__CT_RET="${1:$2:1}"; printf \'%sX\' "$__CT_RET"',
awk_gen=lambda obj, args: f"substr({obj}, {args[0]} + 1, 1)",
min_args=1, max_args=1,
)
urlencode = Method(
name="urlencode",
bash_func="__ct_str_urlencode",
bash_impl='local s="$1" c i len=${#1}; __CT_RET=""; for ((i=0; i<len; i++)); do c="${s:i:1}"; case "$c" in [a-zA-Z0-9.~_-]) __CT_RET+="$c" ;; *) __CT_RET+=$(printf "%%%02X" "\'$c") ;; esac; done; echo "$__CT_RET"',
)
concat = Method(
name="concat",
bash_func="__ct_str_concat",
bash_impl='__CT_RET="$1$2"; echo "$__CT_RET"',
awk_gen=lambda obj, args: f"({obj} {args[0]})",
min_args=1, max_args=1,
)
ord = Method(
name="ord",
bash_func="__ct_str_ord",
bash_impl="__CT_RET=$(printf '%d' \"'$1\"); echo \"$__CT_RET\"",
)
chr = Method(
name="chr",
bash_func="__ct_str_chr",
bash_impl="printf -v __CT_RET '%b' \"\\\\x$(printf '%02x' \"$1\")\"; echo \"$__CT_RET\"",
)
from .base import Method
class TimeMethods:
now = Method(name="now", bash_func="__ct_time_now", bash_impl='date +%s')
ms = Method(name="ms", bash_func="__ct_time_ms", bash_impl='date +%s%3N')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment