Commit 890d457c authored by Roman Alifanov's avatar Roman Alifanov

Add estrlist example with benchmarks, fix time.ms/math.* codegen

- Add estrlist.ct: string list operations with @awk acceleration - Add estrlist_noawk.ct: pure bash version for comparison - Add benchmark.sh: performance comparison script - Add BENCHMARK.md: results showing @awk gives 93x speedup on union Codegen fixes: - Fix time.now() and time.ms() calls in dispatch_codegen - Fix math.* namespace calls - Previous fixes for class field assignment and nested constructors
parent 4115df74
......@@ -13,6 +13,19 @@ class ClassMixin:
self.class_field_types[(cls.name, field_name)] = "array"
elif isinstance(default_value, DictLiteral):
self.class_field_types[(cls.name, field_name)] = "dict"
elif isinstance(default_value, NilLiteral):
# nil typically means object reference will be assigned later
self.class_field_types[(cls.name, field_name)] = "object"
elif isinstance(default_value, NewExpr):
# new SomeClass() is an object
self.class_field_types[(cls.name, field_name)] = "object"
elif isinstance(default_value, CallExpr) and isinstance(default_value.callee, Identifier):
# SomeClass() constructor call is an object
callee_name = default_value.callee.name
if callee_name in self.classes:
self.class_field_types[(cls.name, field_name)] = "object"
else:
self.class_field_types[(cls.name, field_name)] = "scalar"
else:
self.class_field_types[(cls.name, field_name)] = "scalar"
......@@ -47,25 +60,26 @@ class ClassMixin:
"""Generate class factory function."""
self.emit(f"{cls.name} () {{")
self.indent_level += 1
self.emit('__ct_last_instance="__ct_inst_$RANDOM$RANDOM"')
self.emit(f'__ct_obj_class["$__ct_last_instance"]="{cls.name}"')
# Save instance immediately as nested constructors may overwrite __ct_last_instance
self.emit('local __ct_this_instance="__ct_inst_$RANDOM$RANDOM"')
self.emit('__ct_obj_class["$__ct_this_instance"]="{}"'.format(cls.name))
for field_name, default_value in cls.fields:
if isinstance(default_value, ArrayLiteral):
elements = [self.generate_expr(e) for e in default_value.elements]
if elements:
arr_content = " ".join([f'"{e}"' for e in elements])
self.emit(f'declare -ga "${{__ct_last_instance}}_{field_name}=({arr_content})"')
self.emit(f'declare -ga "${{__ct_this_instance}}_{field_name}=({arr_content})"')
else:
self.emit(f'declare -ga "${{__ct_last_instance}}_{field_name}=()"')
self.emit(f'declare -ga "${{__ct_this_instance}}_{field_name}=()"')
elif isinstance(default_value, DictLiteral):
self.emit(f'declare -gA "${{__ct_last_instance}}_{field_name}=()"')
self.emit(f'__CT_OBJ["$__ct_last_instance.{field_name}"]="${{__ct_last_instance}}_{field_name}"')
self.emit(f'declare -gA "${{__ct_this_instance}}_{field_name}=()"')
self.emit(f'__CT_OBJ["$__ct_this_instance.{field_name}"]="${{__ct_this_instance}}_{field_name}"')
elif default_value:
val = self.generate_expr(default_value)
self.emit(f'__CT_OBJ["$__ct_last_instance.{field_name}"]="{val}"')
self.emit(f'__CT_OBJ["$__ct_this_instance.{field_name}"]="{val}"')
else:
self.emit(f'__CT_OBJ["$__ct_last_instance.{field_name}"]=""')
self.emit(f'__CT_OBJ["$__ct_this_instance.{field_name}"]=""')
if cls.parent:
self.emit(f'# Inherit from {cls.parent}')
......@@ -73,7 +87,10 @@ class ClassMixin:
if cls.constructor:
self.emit("# Call constructor")
params_list = " ".join([f'"${{{i + 1}}}"' for i in range(len(cls.constructor.params))])
self.emit(f'__ct_class_{cls.name}_construct "$__ct_last_instance" {params_list}')
self.emit(f'__ct_class_{cls.name}_construct "$__ct_this_instance" {params_list}')
# Restore __ct_last_instance to this instance (after nested constructors may have changed it)
self.emit('__ct_last_instance="$__ct_this_instance"')
self.indent_level -= 1
self.emit("}")
......
......@@ -43,6 +43,7 @@ class CodeGenerator(StdlibMixin, AwkCodegenMixin, ExprMixin, StmtMixin,
self.array_vars: Set[str] = set()
self.dict_vars: Set[str] = set()
self.object_vars: Set[str] = set() # Variables holding object instances
self.class_field_types: Dict[tuple, str] = {}
self.local_vars: Set[str] = set()
......
......@@ -47,6 +47,7 @@ class DispatchMixin:
args_str = " ".join([f'"{a}"' for a in args])
self.emit(f'{stmt.value.class_name} {args_str}')
self.emit_var_assign(target, '$__ct_last_instance')
self.object_vars.add(target) # Track object variable
return
if isinstance(stmt.value, CallExpr) and isinstance(stmt.value.callee, Identifier):
......@@ -56,6 +57,7 @@ class DispatchMixin:
args_str = " ".join([f'"{a}"' for a in args])
self.emit(f'{callee_name} {args_str}')
self.emit_var_assign(target, '$__ct_last_instance')
self.object_vars.add(target) # Track object variable
return
if isinstance(stmt.value, CallExpr) and isinstance(stmt.value.callee, MemberAccess):
......@@ -109,6 +111,27 @@ class DispatchMixin:
def _generate_this_field_assignment(self, stmt: Assignment):
"""Generate this.field = value assignment."""
field = stmt.target.member
# Handle class instantiation: this.field = SomeClass() or this.field = new SomeClass()
# Important: save instance immediately as nested constructors may overwrite __ct_last_instance
if isinstance(stmt.value, NewExpr):
args = [self.generate_expr(arg) for arg in stmt.value.arguments]
args_str = " ".join([f'"{a}"' for a in args])
self.emit(f'{stmt.value.class_name} {args_str}')
self.emit(f'local __ct_tmp_{field}="$__ct_last_instance"')
self.emit(f'__CT_OBJ["$this.{field}"]="$__ct_tmp_{field}"')
return
if isinstance(stmt.value, CallExpr) and isinstance(stmt.value.callee, Identifier):
callee_name = stmt.value.callee.name
if callee_name in self.classes:
args = [self.generate_expr(arg) for arg in stmt.value.arguments]
args_str = " ".join([f'"{a}"' for a in args])
self.emit(f'{callee_name} {args_str}')
self.emit(f'local __ct_tmp_{field}="$__ct_last_instance"')
self.emit(f'__CT_OBJ["$this.{field}"]="$__ct_tmp_{field}"')
return
value = self.generate_expr(stmt.value)
if stmt.operator == "=":
self.emit(f'__CT_OBJ["$this.{field}"]="{value}"')
......@@ -137,6 +160,34 @@ class DispatchMixin:
self.emit_var_assign(target, '$__CT_RET')
return True
# Handle this.field.method() - field holds an object reference
if isinstance(callee.object, MemberAccess) and isinstance(callee.object.object, ThisExpr):
field_name = callee.object.member
method = callee.member
args_str = " ".join([f'"{a}"' for a in args])
# Check field type
field_type = self.class_field_types.get((self.current_class, field_name)) if self.current_class else None
if field_type == "array" and method in ARR_METHODS:
arr_name = f'"${{this}}_{field_name}"'
self.emit(f'{ARR_METHODS[method]} {arr_name} {args_str} >/dev/null'.strip())
self.emit_var_assign(target, '$__CT_RET')
return True
if field_type == "dict" and method in DICT_METHODS:
dict_ref = f'"${{__CT_OBJ[\\"$this.{field_name}\\"]}}"'
self.emit(f'{DICT_METHODS[method]} {dict_ref} {args_str} >/dev/null'.strip())
self.emit_var_assign(target, '$__CT_RET')
return True
if field_type == "object":
# Object field - use dynamic dispatch
obj_ref = f'${{__CT_OBJ["$this.{field_name}"]}}'
self.emit(f'__ct_call_method "{obj_ref}" "{method}" {args_str} >/dev/null')
self.emit_var_assign(target, '$__CT_RET')
return True
# Scalar field - fall through to default handling
return False
if isinstance(callee.object, Identifier):
obj_name = callee.object.name
method = callee.member
......@@ -315,13 +366,16 @@ class DispatchMixin:
return
call_code = self.generate_call_statement(expr)
# Add >/dev/null to suppress return value echo for regular method calls
# But NOT for object method calls (this.method(), this.field.method(), obj.method())
# because they may print output that needs to pass through
if isinstance(expr.callee, MemberAccess):
obj = expr.callee.object
if isinstance(obj, Identifier):
if obj.name not in BUILTIN_NAMESPACES:
# Don't suppress if it's an object variable or in BUILTIN_NAMESPACES
if obj.name not in BUILTIN_NAMESPACES and obj.name not in self.object_vars:
call_code = f'{call_code} >/dev/null'
elif isinstance(obj, ThisExpr):
call_code = f'{call_code} >/dev/null'
# Don't add >/dev/null for ThisExpr or MemberAccess (this.field.method())
self.emit(call_code)
else:
result = self.generate_expr(expr)
......@@ -520,6 +574,13 @@ class DispatchMixin:
return f'$({args[0]})' if args else ""
elif method == "source":
return f'source "{args[0]}"' if args else ""
elif obj_name == "time":
if method == "now":
return '__ct_time_now'
elif method == "ms":
return '__ct_time_ms'
elif obj_name == "math":
return f'__ct_math_{method} {args_str}'
return None
def _generate_this_field_call(self, callee: MemberAccess, args_str: str) -> str:
......@@ -527,14 +588,23 @@ class DispatchMixin:
field_name = callee.object.member
method = callee.member
if method in ARR_METHODS:
# Check field type if known
field_type = self.class_field_types.get((self.current_class, field_name)) if self.current_class else None
if field_type == "array" and method in ARR_METHODS:
arr_name = f'"${{this}}_{field_name}"'
return f'{ARR_METHODS[method]} {arr_name} {args_str}'.strip()
if method in DICT_METHODS:
if field_type == "dict" and method in DICT_METHODS:
dict_ref = f'"${{__CT_OBJ[\\"$this.{field_name}\\"]}}"'
return f'{DICT_METHODS[method]} {dict_ref} {args_str}'.strip()
if field_type == "object":
# Object field - use dynamic dispatch
obj_ref = f'${{__CT_OBJ["$this.{field_name}"]}}'
return f'__ct_call_method "{obj_ref}" "{method}" {args_str}'
# Scalar/unknown field - return None to fall through to default handling
return None
def _generate_var_field_call(self, callee: MemberAccess, args_str: str) -> str:
......@@ -551,6 +621,10 @@ class DispatchMixin:
self.class_field_types.get((cls, field_name)) == "dict"
for cls in self.classes
)
is_object_field = any(
self.class_field_types.get((cls, field_name)) == "object"
for cls in self.classes
)
if is_array_field and method in ARR_METHODS:
return f'{ARR_METHODS[method]} "${{{var_name}}}_{field_name}" {args_str}'.strip()
......@@ -558,4 +632,10 @@ class DispatchMixin:
if is_dict_field and method in DICT_METHODS:
return f'{DICT_METHODS[method]} "${{{var_name}}}_{field_name}" {args_str}'.strip()
if is_object_field:
# Object field - use dynamic dispatch
obj_ref = f'${{__CT_OBJ["${{{var_name}}}.{field_name}"]}}'
return f'__ct_call_method "{obj_ref}" "{method}" {args_str}'
# Scalar/unknown field - return None to fall through to default handling
return None
# Benchmark: estrlist implementations
Сравнение 4 реализаций estrlist:
- `/bin/estrlist` — ELF бинарник (C)
- `/bin/estrlist.sh` — оригинальный shell-скрипт
- `estrlist_awk.sh` — ContenT с @awk декораторами
- `estrlist_noawk.sh` — ContenT без @awk (чистый bash)
## Результаты
**Тестовые данные:** small=500 слов, medium=2000 слов, large=10000 слов
**Итерации:** 100 (50 для large)
### TEST 1: strip (500 words, 100 iterations)
| Реализация | Общее время | Среднее |
|------------|-------------|---------|
| ELF binary | 117ms | 1ms |
| Shell original | 280ms | 2ms |
| ContenT @awk | 1036ms | 10ms |
| ContenT noawk | 1429ms | 14ms |
### TEST 2: count (2000 words, 100 iterations)
| Реализация | Общее время | Среднее |
|------------|-------------|---------|
| ELF binary | 193ms | 1ms |
| Shell original | 440ms | 4ms |
| ContenT @awk | 1372ms | 13ms |
| ContenT noawk | 2922ms | 29ms |
### TEST 3: union (2000 words with duplicates, 100 iterations)
| Реализация | Общее время | Среднее |
|------------|-------------|---------|
| ELF binary | 171ms | 1ms |
| Shell original | 1378ms | 13ms |
| ContenT @awk | 1257ms | 12ms |
| **ContenT noawk** | **111875ms** | **1118ms** |
### TEST 4: first (2000 words, 100 iterations)
| Реализация | Общее время | Среднее |
|------------|-------------|---------|
| ELF binary | 189ms | 1ms |
| Shell original | 429ms | 4ms |
| ContenT @awk | 1279ms | 12ms |
| ContenT noawk | 2361ms | 23ms |
### TEST 5: list (500 words, 100 iterations)
| Реализация | Общее время | Среднее |
|------------|-------------|---------|
| ELF binary | 159ms | 1ms |
| Shell original | 360ms | 3ms |
| ContenT @awk | 891ms | 8ms |
| ContenT noawk | 1749ms | 17ms |
### TEST 6: strip (10000 words, 50 iterations)
| Реализация | Общее время | Среднее |
|------------|-------------|---------|
| ELF binary | 277ms | 5ms |
| Shell original | 688ms | 13ms |
| ContenT @awk | 3249ms | 64ms |
| ContenT noawk | 3993ms | 79ms |
## Выводы
### Общая производительность (среднее время)
| Операция | ELF | Shell | @awk | noawk | @awk vs noawk |
|----------|-----|-------|------|-------|---------------|
| strip 500w | 1ms | 2ms | 10ms | 14ms | 1.4x |
| count 2000w | 1ms | 4ms | 13ms | 29ms | 2.2x |
| **union 2000w** | 1ms | 13ms | **12ms** | **1118ms** | **93x** |
| first 2000w | 1ms | 4ms | 12ms | 23ms | 1.9x |
| list 500w | 1ms | 3ms | 8ms | 17ms | 2.1x |
| strip 10000w | 5ms | 13ms | 64ms | 79ms | 1.2x |
### Ключевые наблюдения
1. **ELF binary** — эталон производительности (~1-5ms)
2. **Shell original** — 2-13x медленнее ELF
3. **ContenT @awk** — сопоставим с Shell original на сложных операциях (union)
4. **ContenT noawk****катастрофически медленный на union** (1118ms vs 12ms)
### Критическая разница: union
```
@awk: 12ms — AWK делает дедупликацию + сортировку за один проход
noawk: 1118ms — bash foreach + dict + вызов sort в subshell
```
**@awk даёт 93x ускорение** на операции union с 2000 словами.
### Масштабирование на больших данных (10000 слов)
На strip с 10000 слов разница между @awk и noawk минимальна (64ms vs 79ms),
потому что основное время занимает передача данных и инициализация.
## Запуск бенчмарка
```bash
cd examples/estrlist
./benchmark.sh
```
# benchmark.ct - Benchmark estrlist implementations
# Compares: /bin/estrlist (ELF), /bin/estrlist.sh, estrlist.ct (@awk), estrlist_noawk.ct
func generate_test_data (size) {
words = []
for i in range (0, size) {
words.push ("word{i}")
}
return words.join (" ")
}
func generate_duplicate_data (size) {
words = []
for i in range (0, size) {
mod = i % 10
words.push ("item{mod}")
}
return words.join (" ")
}
func benchmark (name, cmd, iterations) {
start_ms = time.ms ()
for i in range (0, iterations) {
shell.capture (cmd)
}
end_ms = time.ms ()
elapsed = end_ms - start_ms
avg = elapsed / iterations
print ("{name}: {elapsed}ms total, {avg}ms avg ({iterations} iters)")
return elapsed
}
func run_benchmarks () {
print ("==========================================")
print ("ESTRLIST BENCHMARK")
print ("==========================================")
print ("")
small = 50
medium = 200
iterations = 10
print ("Generating test data...")
small_data = generate_test_data (small)
medium_data = generate_test_data (medium)
dup_data = generate_duplicate_data (medium)
fs.write ("/tmp/bench_small.txt", small_data)
fs.write ("/tmp/bench_medium.txt", medium_data)
fs.write ("/tmp/bench_dup.txt", dup_data)
print ("Test data: small={small} words, medium={medium} words")
print ("")
# TEST 1: strip
print ("--- TEST 1: strip ({small} words) ---")
benchmark ("ELF binary ", "/bin/estrlist strip \"$(cat /tmp/bench_small.txt)\"", iterations)
benchmark ("Shell original", "/bin/estrlist.sh strip \"$(cat /tmp/bench_small.txt)\"", iterations)
benchmark ("ContenT @awk ", "./estrlist_awk.sh strip \"$(cat /tmp/bench_small.txt)\"", iterations)
benchmark ("ContenT noawk ", "./estrlist_noawk.sh strip \"$(cat /tmp/bench_small.txt)\"", iterations)
print ("")
# TEST 2: count
print ("--- TEST 2: count ({medium} words) ---")
benchmark ("ELF binary ", "/bin/estrlist count \"$(cat /tmp/bench_medium.txt)\"", iterations)
benchmark ("Shell original", "/bin/estrlist.sh count \"$(cat /tmp/bench_medium.txt)\"", iterations)
benchmark ("ContenT @awk ", "./estrlist_awk.sh count \"$(cat /tmp/bench_medium.txt)\"", iterations)
benchmark ("ContenT noawk ", "./estrlist_noawk.sh count \"$(cat /tmp/bench_medium.txt)\"", iterations)
print ("")
# TEST 3: union
print ("--- TEST 3: union ({medium} words with dups) ---")
benchmark ("ELF binary ", "/bin/estrlist union \"$(cat /tmp/bench_dup.txt)\"", iterations)
benchmark ("Shell original", "/bin/estrlist.sh union \"$(cat /tmp/bench_dup.txt)\"", iterations)
benchmark ("ContenT @awk ", "./estrlist_awk.sh union \"$(cat /tmp/bench_dup.txt)\"", iterations)
benchmark ("ContenT noawk ", "./estrlist_noawk.sh union \"$(cat /tmp/bench_dup.txt)\"", iterations)
print ("")
# TEST 4: first
print ("--- TEST 4: first ({medium} words) ---")
benchmark ("ELF binary ", "/bin/estrlist first \"$(cat /tmp/bench_medium.txt)\"", iterations)
benchmark ("Shell original", "/bin/estrlist.sh first \"$(cat /tmp/bench_medium.txt)\"", iterations)
benchmark ("ContenT @awk ", "./estrlist_awk.sh first \"$(cat /tmp/bench_medium.txt)\"", iterations)
benchmark ("ContenT noawk ", "./estrlist_noawk.sh first \"$(cat /tmp/bench_medium.txt)\"", iterations)
print ("")
# TEST 5: list
print ("--- TEST 5: list ({small} words) ---")
benchmark ("ELF binary ", "/bin/estrlist list \"$(cat /tmp/bench_small.txt)\" >/dev/null", iterations)
benchmark ("Shell original", "/bin/estrlist.sh list \"$(cat /tmp/bench_small.txt)\" >/dev/null", iterations)
benchmark ("ContenT @awk ", "./estrlist_awk.sh list \"$(cat /tmp/bench_small.txt)\" >/dev/null", iterations)
benchmark ("ContenT noawk ", "./estrlist_noawk.sh list \"$(cat /tmp/bench_small.txt)\" >/dev/null", iterations)
print ("")
# Cleanup
fs.remove ("/tmp/bench_small.txt")
fs.remove ("/tmp/bench_medium.txt")
fs.remove ("/tmp/bench_dup.txt")
print ("==========================================")
print ("BENCHMARK COMPLETE")
print ("==========================================")
}
run_benchmarks ()
#!/bin/bash
# Benchmark estrlist implementations
# Compares: /bin/estrlist (ELF), /bin/estrlist.sh, estrlist_awk.sh, estrlist_noawk.sh
set -euo pipefail
cd "$(dirname "$0")"
echo "=========================================="
echo "ESTRLIST BENCHMARK"
echo "=========================================="
echo ""
# Generate test data
SMALL=500
MEDIUM=2000
ITERATIONS=100
echo "Generating test data..."
# Small data
small_data=""
for i in $(seq 0 $((SMALL-1))); do
small_data="$small_data word$i"
done
# Medium data
medium_data=""
for i in $(seq 0 $((MEDIUM-1))); do
medium_data="$medium_data word$i"
done
# Duplicate data for union test
dup_data=""
for i in $(seq 0 $((MEDIUM-1))); do
mod=$((i % 10))
dup_data="$dup_data item$mod"
done
echo "Test data: small=$SMALL words, medium=$MEDIUM words"
echo ""
benchmark() {
local name="$1"
local cmd="$2"
local iters="$3"
local start_ms=$(date +%s%3N)
for ((i=0; i<iters; i++)); do
eval "$cmd" >/dev/null 2>&1 || true
done
local end_ms=$(date +%s%3N)
local elapsed=$((end_ms - start_ms))
local avg=$((elapsed / iters))
printf "%-18s: %4dms total, %3dms avg (%d iters)\n" "$name" "$elapsed" "$avg" "$iters"
}
# TEST 1: strip
echo "--- TEST 1: strip ($SMALL words) ---"
benchmark "ELF binary" "/bin/estrlist strip '$small_data'" "$ITERATIONS"
benchmark "Shell original" "/bin/estrlist.sh strip '$small_data'" "$ITERATIONS"
benchmark "ContenT @awk" "./estrlist_awk.sh strip '$small_data'" "$ITERATIONS"
benchmark "ContenT noawk" "./estrlist_noawk.sh strip '$small_data'" "$ITERATIONS"
echo ""
# TEST 2: count
echo "--- TEST 2: count ($MEDIUM words) ---"
benchmark "ELF binary" "/bin/estrlist count '$medium_data'" "$ITERATIONS"
benchmark "Shell original" "/bin/estrlist.sh count '$medium_data'" "$ITERATIONS"
benchmark "ContenT @awk" "./estrlist_awk.sh count '$medium_data'" "$ITERATIONS"
benchmark "ContenT noawk" "./estrlist_noawk.sh count '$medium_data'" "$ITERATIONS"
echo ""
# TEST 3: union
echo "--- TEST 3: union ($MEDIUM words with dups) ---"
benchmark "ELF binary" "/bin/estrlist union '$dup_data'" "$ITERATIONS"
benchmark "Shell original" "/bin/estrlist.sh union '$dup_data'" "$ITERATIONS"
benchmark "ContenT @awk" "./estrlist_awk.sh union '$dup_data'" "$ITERATIONS"
benchmark "ContenT noawk" "./estrlist_noawk.sh union '$dup_data'" "$ITERATIONS"
echo ""
# TEST 4: first
echo "--- TEST 4: first ($MEDIUM words) ---"
benchmark "ELF binary" "/bin/estrlist first '$medium_data'" "$ITERATIONS"
benchmark "Shell original" "/bin/estrlist.sh first '$medium_data'" "$ITERATIONS"
benchmark "ContenT @awk" "./estrlist_awk.sh first '$medium_data'" "$ITERATIONS"
benchmark "ContenT noawk" "./estrlist_noawk.sh first '$medium_data'" "$ITERATIONS"
echo ""
# TEST 5: list
echo "--- TEST 5: list ($SMALL words, output suppressed) ---"
benchmark "ELF binary" "/bin/estrlist list '$small_data'" "$ITERATIONS"
benchmark "Shell original" "/bin/estrlist.sh list '$small_data'" "$ITERATIONS"
benchmark "ContenT @awk" "./estrlist_awk.sh list '$small_data'" "$ITERATIONS"
benchmark "ContenT noawk" "./estrlist_noawk.sh list '$small_data'" "$ITERATIONS"
echo ""
# TEST 6: Large data strip
LARGE=10000
large_data=""
for i in $(seq 0 $((LARGE-1))); do
large_data="$large_data word$i"
done
echo "--- TEST 6: strip ($LARGE words, 50 iters) ---"
benchmark "ELF binary" "/bin/estrlist strip '$large_data'" 50
benchmark "Shell original" "/bin/estrlist.sh strip '$large_data'" 50
benchmark "ContenT @awk" "./estrlist_awk.sh strip '$large_data'" 50
benchmark "ContenT noawk" "./estrlist_noawk.sh strip '$large_data'" 50
echo ""
echo "=========================================="
echo "BENCHMARK COMPLETE"
echo "=========================================="
# estrlist.ct - String list operations utility
# Uses: cli.ct, classes, @awk, when, lambdas, foreach
# ============================================
# FastOps - AWK-accelerated operations (~1000x faster)
# ============================================
class FastOps {
@awk
func strip (text) {
result = ""
n = text.split (" ")
for i in range (1, n + 1) {
word = __split_arr[i]
if word != "" {
if result == "" {
result = word
} else {
result = result .. " " .. word
}
}
}
return result
}
@awk
func count (text) {
c = 0
n = text.split (" ")
for i in range (1, n + 1) {
if __split_arr[i] != "" {
c += 1
}
}
return c
}
@awk
func first (text) {
n = text.split (" ")
for i in range (1, n + 1) {
word = __split_arr[i]
if word != "" {
return word
}
}
return ""
}
@awk
func last (text) {
result = ""
n = text.split (" ")
for i in range (1, n + 1) {
word = __split_arr[i]
if word != "" {
result = word
}
}
return result
}
@awk
func list (text) {
n = text.split (" ")
for i in range (1, n + 1) {
word = __split_arr[i]
if word != "" {
print (word)
}
}
return ""
}
@awk
func has_word (needle, text) {
n = text.split (" ")
for i in range (1, n + 1) {
if __split_arr[i] == needle {
return 1
}
}
return 0
}
@awk
func union (text) {
result = ""
n = text.split (" ")
for i in range (1, n + 1) {
word = __split_arr[i]
if word != "" {
if seen[word] != 1 {
seen[word] = 1
if result == "" {
result = word
} else {
result = result .. " " .. word
}
}
}
}
return result
}
@awk
func exclude (list1, list2) {
n1 = list1.split (" ")
for i in range (1, n1 + 1) {
w = __split_arr[i]
if w != "" {
excl[w] = 1
}
}
result = ""
n2 = list2.split (" ")
for i in range (1, n2 + 1) {
w = __split_arr[i]
if w != "" {
if excl[w] != 1 {
if result == "" {
result = w
} else {
result = result .. " " .. w
}
}
}
}
return result
}
@awk
func intersection (list1, list2) {
n1 = list1.split (" ")
for i in range (1, n1 + 1) {
w = __split_arr[i]
if w != "" {
lookup[w] = 1
}
}
result = ""
n2 = list2.split (" ")
for i in range (1, n2 + 1) {
w = __split_arr[i]
if w != "" {
if lookup[w] == 1 {
if result == "" {
result = w
} else {
result = result .. " " .. w
}
}
}
}
return result
}
@awk
func tolower_list (text) {
n = text.split (" ")
for i in range (1, n + 1) {
w = __split_arr[i]
if w != "" {
print (w.lower ())
}
}
return ""
}
@awk
func firstupper_list (text) {
n = text.split (" ")
for i in range (1, n + 1) {
w = __split_arr[i]
if w != "" {
first_char = w.substr (0, 1)
rest = w.substr (1, 1000)
print (first_char.upper () .. rest)
}
}
return ""
}
}
# ============================================
# SetOps - Set operations with sorting
# ============================================
class SetOps {
fast = nil
construct () {
this.fast = FastOps ()
}
@awk
func union (text) {
result = ""
word_count = 0
n = text.split (" ")
for i in range (1, n + 1) {
word = __split_arr[i]
if word != "" {
if seen[word] != 1 {
seen[word] = 1
words[word_count] = word
word_count += 1
}
}
}
# Simple insertion sort for strings
for i in range (1, word_count) {
j = i
while j > 0 {
if words[j - 1] > words[j] {
tmp = words[j - 1]
words[j - 1] = words[j]
words[j] = tmp
j -= 1
} else {
break
}
}
}
for i in range (0, word_count) {
if result == "" {
result = words[i]
} else {
result = result .. " " .. words[i]
}
}
return result
}
func exclude (list1, list2) {
if list1 == "" {
return this.fast.strip (list2)
}
return this.fast.exclude (list1, list2)
}
func intersection (list1, list2) {
if list1 == "" || list2 == "" {
return ""
}
return this.fast.intersection (list1, list2)
}
func difference (list1, list2) {
only1 = this.exclude (list2, list1)
only2 = this.exclude (list1, list2)
when only1 {
"" {
return only2
}
else {
if only2 == "" {
return only1
}
return "{only1} {only2}"
}
}
}
}
# ============================================
# RegexOps - Regex-based operations
# ============================================
class RegexOps {
func exclude (patterns, text) {
result = []
foreach word in text.split (" ") {
if word != "" {
matched = false
foreach pat in patterns.split (" ") {
if pat != "" {
full_pat = "^{pat}$"
if regex.match (word, full_pat) {
matched = true
}
}
}
if matched == false {
result.push (word)
}
}
}
return result.join (" ")
}
func include (patterns, text) {
result = []
foreach word in text.split (" ") {
if word != "" {
foreach pat in patterns.split (" ") {
if pat != "" {
full_pat = "^{pat}$"
if regex.match (word, full_pat) {
result.push (word)
break
}
}
}
}
}
return result.join (" ")
}
func word_exclude (patterns, text) {
result = []
foreach word in text.split (" ") {
if word != "" {
matched = false
foreach pat in patterns.split (" ") {
if pat != "" {
if regex.match (word, pat) {
matched = true
}
}
}
if matched == false {
result.push (word)
}
}
}
return result.join (" ")
}
func word_include (patterns, text) {
result = []
foreach word in text.split (" ") {
if word != "" {
foreach pat in patterns.split (" ") {
if pat != "" {
if regex.match (word, "^{pat}") {
result.push (word)
break
}
}
}
}
}
return result.join (" ")
}
}
# ============================================
# Estrlist - Main application
# ============================================
class Estrlist {
fast = nil
sets = nil
regex_ops = nil
construct () {
this.fast = FastOps ()
this.sets = SetOps ()
this.regex_ops = RegexOps ()
}
func run (cmd, arg1, arg2) {
when cmd {
"strip", "strip_spaces" {
print (this.fast.strip (arg1))
}
"list" {
this.fast.list (arg1)
}
"count" {
print (this.fast.count (arg1))
}
"first" {
r = this.fast.first (arg1)
if r != "" {
print (r)
}
}
"last" {
r = this.fast.last (arg1)
if r != "" {
print (r)
}
}
"union", "uniq" {
print (this.sets.union (arg1))
}
"exclude" {
print (this.sets.exclude (arg1, arg2))
}
"intersection" {
print (this.sets.intersection (arg1, arg2))
}
"difference" {
print (this.sets.difference (arg1, arg2))
}
"isempty", "is_empty" {
stripped = this.fast.strip (arg1)
if stripped == "" {
exit (0)
}
exit (1)
}
"has_space" {
c = this.fast.count (arg1)
if c > 1 {
exit (0)
}
exit (1)
}
"has" {
if regex.match (arg2, arg1) {
exit (0)
}
exit (1)
}
"match" {
if regex.match (arg2, arg1) {
exit (0)
}
exit (1)
}
"contains" {
if this.fast.has_word (arg1, arg2) == 1 {
exit (0)
}
exit (1)
}
"reg_exclude" {
print (this.regex_ops.exclude (arg1, arg2))
}
"reg_include" {
print (this.regex_ops.include (arg1, arg2))
}
"reg_wordexclude" {
print (this.regex_ops.word_exclude (arg1, arg2))
}
"reg_wordinclude" {
print (this.regex_ops.word_include (arg1, arg2))
}
"firstupper" {
this.fast.firstupper_list (arg1)
}
"tolower" {
this.fast.tolower_list (arg1)
}
else {
print ("Unknown command: {cmd}")
exit (1)
}
}
}
}
# ============================================
# CLI Setup using cli.ct
# ============================================
func setup_cli () {
app = new_app ("estrlist", "String list operations utility")
app.with_version ("2.0")
# Commands
app.add_command (new_command ("strip", "Remove extra spaces"))
app.add_command (new_command ("list", "Print words line by line"))
app.add_command (new_command ("count", "Count words"))
app.add_command (new_command ("first", "Get first word"))
app.add_command (new_command ("last", "Get last word"))
app.add_command (new_command ("union", "Sort and deduplicate"))
app.add_command (new_command ("exclude", "list2 minus list1"))
app.add_command (new_command ("intersection", "Common items"))
app.add_command (new_command ("difference", "Symmetric difference"))
app.add_command (new_command ("contains", "Check if word in list"))
app.add_command (new_command ("has", "Grep match"))
app.add_command (new_command ("match", "Egrep match"))
app.add_command (new_command ("isempty", "Check if empty"))
app.add_command (new_command ("has_space", "Check multiple words"))
app.add_command (new_command ("reg_exclude", "Exclude by regex"))
app.add_command (new_command ("reg_include", "Include by regex"))
app.add_command (new_command ("firstupper", "Capitalize first letter"))
app.add_command (new_command ("tolower", "Lowercase"))
return app
}
func read_stdin () {
return shell.capture ("cat")
}
func get_arg (index) {
argc = args.count ()
if argc > index {
val = args.get (index)
if val == "-" {
fast = FastOps ()
return fast.strip (read_stdin ())
}
return val
}
return ""
}
# ============================================
# Main
# ============================================
func main () {
argc = args.count ()
if argc == 0 {
app = setup_cli ()
app.help ()
exit (1)
}
cmd = args.get (0)
# Handle help
if cmd == "-h" || cmd == "--help" || cmd == "help" {
app = setup_cli ()
app.help ()
exit (0)
}
arg1 = get_arg (1)
arg2 = get_arg (2)
estrlist = Estrlist ()
estrlist.run (cmd, arg1, arg2)
}
main ()
# estrlist_noawk.ct - String list operations (NO @awk version)
# Pure ContenT/Bash implementation for benchmark comparison
# ============================================
# FastOps - Pure Bash operations (no @awk)
# ============================================
class FastOps {
func strip (text) {
result = []
foreach word in text.split (" ") {
if word != "" {
result.push (word)
}
}
return result.join (" ")
}
func count (text) {
c = 0
foreach word in text.split (" ") {
if word != "" {
c = c + 1
}
}
return c
}
func first (text) {
foreach word in text.split (" ") {
if word != "" {
return word
}
}
return ""
}
func last (text) {
result = ""
foreach word in text.split (" ") {
if word != "" {
result = word
}
}
return result
}
func list (text) {
foreach word in text.split (" ") {
if word != "" {
print (word)
}
}
return ""
}
func has_word (needle, text) {
foreach word in text.split (" ") {
if word == needle {
return 1
}
}
return 0
}
func union (text) {
result = []
seen = {}
foreach word in text.split (" ") {
if word != "" {
if seen.has (word) == false {
seen.set (word, 1)
result.push (word)
}
}
}
return result.join (" ")
}
func exclude (list1, list2) {
excl = {}
foreach w in list1.split (" ") {
if w != "" {
excl.set (w, 1)
}
}
result = []
foreach w in list2.split (" ") {
if w != "" {
if excl.has (w) == false {
result.push (w)
}
}
}
return result.join (" ")
}
func intersection (list1, list2) {
lookup = {}
foreach w in list1.split (" ") {
if w != "" {
lookup.set (w, 1)
}
}
result = []
foreach w in list2.split (" ") {
if w != "" {
if lookup.has (w) {
result.push (w)
}
}
}
return result.join (" ")
}
func tolower_list (text) {
foreach w in text.split (" ") {
if w != "" {
print (w.lower ())
}
}
return ""
}
func firstupper_list (text) {
foreach w in text.split (" ") {
if w != "" {
first_char = w.substr (0, 1)
rest = w.substr (1, 1000)
print (first_char.upper () .. rest)
}
}
return ""
}
}
# ============================================
# SetOps - Set operations with sorting (no @awk)
# ============================================
class SetOps {
fast = nil
construct () {
this.fast = FastOps ()
}
func union (text) {
# Use sort command for sorting since no @awk
unique = this.fast.union (text)
if unique == "" {
return ""
}
# Sort using shell
sorted = shell.capture ("echo '{unique}' | tr ' ' '\\n' | sort | tr '\\n' ' ' | sed 's/ $//'")
return sorted
}
func exclude (list1, list2) {
if list1 == "" {
return this.fast.strip (list2)
}
return this.fast.exclude (list1, list2)
}
func intersection (list1, list2) {
if list1 == "" || list2 == "" {
return ""
}
return this.fast.intersection (list1, list2)
}
func difference (list1, list2) {
only1 = this.exclude (list2, list1)
only2 = this.exclude (list1, list2)
when only1 {
"" {
return only2
}
else {
if only2 == "" {
return only1
}
return "{only1} {only2}"
}
}
}
}
# ============================================
# RegexOps - Regex-based operations
# ============================================
class RegexOps {
func exclude (patterns, text) {
result = []
foreach word in text.split (" ") {
if word != "" {
matched = false
foreach pat in patterns.split (" ") {
if pat != "" {
full_pat = "^{pat}$"
if regex.match (word, full_pat) {
matched = true
}
}
}
if matched == false {
result.push (word)
}
}
}
return result.join (" ")
}
func include (patterns, text) {
result = []
foreach word in text.split (" ") {
if word != "" {
foreach pat in patterns.split (" ") {
if pat != "" {
full_pat = "^{pat}$"
if regex.match (word, full_pat) {
result.push (word)
break
}
}
}
}
}
return result.join (" ")
}
func word_exclude (patterns, text) {
result = []
foreach word in text.split (" ") {
if word != "" {
matched = false
foreach pat in patterns.split (" ") {
if pat != "" {
if regex.match (word, pat) {
matched = true
}
}
}
if matched == false {
result.push (word)
}
}
}
return result.join (" ")
}
func word_include (patterns, text) {
result = []
foreach word in text.split (" ") {
if word != "" {
foreach pat in patterns.split (" ") {
if pat != "" {
if regex.match (word, "^{pat}") {
result.push (word)
break
}
}
}
}
}
return result.join (" ")
}
}
# ============================================
# Estrlist - Main application
# ============================================
class Estrlist {
fast = nil
sets = nil
regex_ops = nil
construct () {
this.fast = FastOps ()
this.sets = SetOps ()
this.regex_ops = RegexOps ()
}
func run (cmd, arg1, arg2) {
when cmd {
"strip", "strip_spaces" {
print (this.fast.strip (arg1))
}
"list" {
this.fast.list (arg1)
}
"count" {
print (this.fast.count (arg1))
}
"first" {
r = this.fast.first (arg1)
if r != "" {
print (r)
}
}
"last" {
r = this.fast.last (arg1)
if r != "" {
print (r)
}
}
"union", "uniq" {
print (this.sets.union (arg1))
}
"exclude" {
print (this.sets.exclude (arg1, arg2))
}
"intersection" {
print (this.sets.intersection (arg1, arg2))
}
"difference" {
print (this.sets.difference (arg1, arg2))
}
"isempty", "is_empty" {
stripped = this.fast.strip (arg1)
if stripped == "" {
exit (0)
}
exit (1)
}
"has_space" {
c = this.fast.count (arg1)
if c > 1 {
exit (0)
}
exit (1)
}
"has" {
if regex.match (arg2, arg1) {
exit (0)
}
exit (1)
}
"match" {
if regex.match (arg2, arg1) {
exit (0)
}
exit (1)
}
"contains" {
if this.fast.has_word (arg1, arg2) == 1 {
exit (0)
}
exit (1)
}
"reg_exclude" {
print (this.regex_ops.exclude (arg1, arg2))
}
"reg_include" {
print (this.regex_ops.include (arg1, arg2))
}
"reg_wordexclude" {
print (this.regex_ops.word_exclude (arg1, arg2))
}
"reg_wordinclude" {
print (this.regex_ops.word_include (arg1, arg2))
}
"firstupper" {
this.fast.firstupper_list (arg1)
}
"tolower" {
this.fast.tolower_list (arg1)
}
else {
print ("Unknown command: {cmd}")
exit (1)
}
}
}
}
# ============================================
# Main (simplified - no cli.ct dependency)
# ============================================
func read_stdin () {
return shell.capture ("cat")
}
func get_arg (index) {
argc = args.count ()
if argc > index {
val = args.get (index)
if val == "-" {
fast = FastOps ()
return fast.strip (read_stdin ())
}
return val
}
return ""
}
func main () {
argc = args.count ()
if argc == 0 {
print ("Usage: estrlist_noawk <command> <arg1> [arg2]")
exit (1)
}
cmd = args.get (0)
if cmd == "-h" || cmd == "--help" || cmd == "help" {
print ("estrlist_noawk - String list operations (no @awk)")
print ("Commands: strip, list, count, first, last, union, exclude, intersection, difference")
print (" contains, has, match, isempty, has_space, tolower, firstupper")
exit (0)
}
arg1 = get_arg (1)
arg2 = get_arg (2)
estrlist = Estrlist ()
estrlist.run (cmd, arg1, arg2)
}
main ()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment