diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ced216a7..467fc2f1 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -68,6 +68,7 @@ jobs: run: | python scripts/check_pragma_once.py include python scripts/check_undef.py src + python scripts/check_binding_retval.py - name: Unit Test with Coverage run: bash run_tests.sh - name: Upload coverage reports to Codecov diff --git a/README.md b/README.md index b0e7d9da..1664ded7 100644 --- a/README.md +++ b/README.md @@ -200,6 +200,19 @@ And these are the results of the primes benchmark on Intel i5-12400F, WSL (Ubunt Submit a pull request to add your project here. +## Code Quality + +pocketpy maintains code quality through automated checks: + +- **Binding Return Value Check**: Ensures all Python binding functions properly set return values + ```bash + python scripts/check_binding_retval.py + ``` +- **Pragma Once Check**: Validates header file include guards +- **Define/Undef Check**: Ensures balanced macro definitions + +These checks run automatically in CI/CD pipeline. + ## Contribution All kinds of contributions are welcome. diff --git a/docs/check_binding_retval.md b/docs/check_binding_retval.md new file mode 100644 index 00000000..dbd8d8db --- /dev/null +++ b/docs/check_binding_retval.md @@ -0,0 +1,175 @@ +# Binding Return Value Checker + +## Overview + +The `check_binding_retval.py` script is a static analysis tool that validates Python binding functions in the pocketpy codebase. It ensures that all functions bound to Python properly set return values before returning `true`. + +## Purpose + +In pocketpy's C API, when a binding function returns `true`, it indicates successful execution. According to the API conventions, the function MUST set a return value through one of these methods: + +1. **Direct assignment** using `py_new*` functions: + ```c + py_newint(py_retval(), 42); + return true; + ``` + +2. **Setting None** explicitly: + ```c + py_newnone(py_retval()); + return true; + ``` + +3. **Using assignment**: + ```c + py_assign(py_retval(), some_value); + return true; + ``` + +4. **Calling functions** that set `py_retval()` internally: + ```c + bool ok = py_import("module_name"); // Sets py_retval() on success + if(ok) return true; + ``` + +## Usage + +### Basic Usage + +Run the checker on default directories (`src/bindings` and `src/modules`): + +```bash +python scripts/check_binding_retval.py +``` + +### Verbose Mode + +Enable verbose output for debugging: + +```bash +python scripts/check_binding_retval.py --verbose +``` + +### Custom Directories + +Check specific directories: + +```bash +python scripts/check_binding_retval.py --dirs src/bindings src/modules src/custom +``` + +## Exit Codes + +- `0`: No issues found (success) +- `1`: Issues found (binding functions missing return value assignment) +- `2`: Script error (e.g., file access error) + +## Integration + +The checker is integrated into the CI/CD pipeline through the GitHub Actions workflow. It runs automatically on: + +- Push events +- Pull request events + +The check is part of the "Run Script Check" step in `.github/workflows/main.yml`. + +## What It Checks + +The tool analyzes all functions that are bound to Python through: + +- `py_bindfunc()` +- `py_bind()` +- `py_bindmagic()` +- `py_bindmethod()` +- `py_bindproperty()` + +For each bound function, it verifies that: + +1. If the function contains `return true` statements +2. There are corresponding assignments to `py_retval()` +3. Comments are excluded from analysis (to avoid false positives) + +## Functions That Set py_retval() Internally + +The checker recognizes these functions that internally set `py_retval()`: + +- `py_import()` - Sets py_retval() to the imported module on success +- `py_call()` - Sets py_retval() to the call result +- `py_iter()` - Sets py_retval() to the iterator +- `py_str()` - Sets py_retval() to string representation +- `py_repr()` - Sets py_retval() to repr string +- `py_getattr()` - Sets py_retval() to attribute value +- `py_next()` - Sets py_retval() to next value +- `py_getitem()` - Sets py_retval() to the item +- `py_vectorcall()` - Sets py_retval() to call result + +## Example Issues + +### Issue: Missing Return Value + +```c +static bool my_function(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + // Do some work... + return true; // BUG: Should set py_retval() before returning! +} +``` + +**Fix:** + +```c +static bool my_function(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + // Do some work... + py_newnone(py_retval()); // Set return value to None + return true; +} +``` + +### Correct: Using py_new* Functions + +```c +static bool add_numbers(int argc, py_Ref argv) { + PY_CHECK_ARGC(2); + py_i64 a = py_toint(py_arg(0)); + py_i64 b = py_toint(py_arg(1)); + py_newint(py_retval(), a + b); // Sets return value + return true; +} +``` + +### Correct: Calling Functions That Set py_retval() + +```c +static bool import_module(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + int res = py_import(py_tostr(py_arg(0))); // Sets py_retval() on success + if(res == -1) return false; + if(res) return true; // py_retval() already set by py_import + return ImportError("module not found"); +} +``` + +## False Positives + +The checker may occasionally report false positives for: + +1. **Helper functions** that are bound but shouldn't set return values (rare) +2. **Complex control flow** where the return value is set conditionally + +If you encounter a false positive, verify that: +1. The function is actually a Python-bound callable +2. The return value is properly set in all code paths leading to `return true` + +## Maintenance + +When adding new patterns or functions that set `py_retval()` internally: + +1. Update the `RETVAL_SETTING_FUNCTIONS` set in `check_binding_retval.py` +2. Add corresponding test cases +3. Update this documentation + +## Related Documentation + +- [pocketpy C API Documentation](https://pocketpy.dev/c-api/) +- [Binding Functions Guide](https://pocketpy.dev/c-api/bindings/) diff --git a/run_tests.sh b/run_tests.sh index 6b2c46ef..5cdb79b5 100644 --- a/run_tests.sh +++ b/run_tests.sh @@ -2,6 +2,10 @@ set -e python prebuild.py +# Run static analysis checks +echo "Running static analysis checks..." +python scripts/check_binding_retval.py || { echo "Binding return value check failed"; exit 1; } + SRC=$(find src/ -name "*.c") clang -std=c11 --coverage -O1 -Wfatal-errors -o main src2/main.c $SRC -Iinclude -DPK_ENABLE_OS=1 -lm -ldl -DNDEBUG diff --git a/scripts/check_binding_retval.py b/scripts/check_binding_retval.py new file mode 100755 index 00000000..1c078e45 --- /dev/null +++ b/scripts/check_binding_retval.py @@ -0,0 +1,312 @@ +#!/usr/bin/env python3 +""" +Static analysis tool to check Python binding functions for missing py_retval() assignments. + +This tool checks whether Python binding functions properly set return values before returning true. +According to pocketpy conventions, when a binding function returns true, it MUST either: +1. Assign a value to py_retval() using py_new* functions, py_assign, etc. +2. Set the return value to None using py_newnone(py_retval()) +3. Call a function that sets py_retval() internally (like py_import, py_call, py_iter, etc.) + +Usage: + python scripts/check_binding_retval.py [--verbose] + +Exit codes: + 0: No issues found + 1: Issues found + 2: Script error +""" + +import os +import re +import sys +import argparse +from typing import List, Dict, Tuple, Set + +# Functions that set py_retval() internally +RETVAL_SETTING_FUNCTIONS = { + 'py_import', # Sets py_retval() on success + 'py_call', # Sets py_retval() with result + 'py_iter', # Sets py_retval() with iterator + 'py_str', # Sets py_retval() with string representation + 'py_repr', # Sets py_retval() with repr string + 'py_getattr', # Sets py_retval() with attribute value + 'py_next', # Sets py_retval() with next value + 'py_getitem', # Sets py_retval() with item + 'py_vectorcall', # Sets py_retval() with call result +} + +# Patterns that indicate py_retval() is being set +RETVAL_PATTERNS = [ + r'py_retval\(\)', # Direct py_retval() usage + r'py_new\w+\s*\(\s*py_retval\(\)', # py_newint(py_retval(), ...) + r'py_assign\s*\(\s*py_retval\(\)', # py_assign(py_retval(), ...) + r'\*py_retval\(\)\s*=', # *py_retval() = ... +] + + +class BindingChecker: + """Checker for Python binding functions.""" + + def __init__(self, verbose: bool = False): + self.verbose = verbose + self.issues: List[Dict] = [] + + def log(self, message: str): + """Log message if verbose mode is enabled.""" + if self.verbose: + print(f"[DEBUG] {message}") + + def find_c_files(self, *directories: str) -> List[str]: + """Find all .c files in the given directories.""" + c_files = [] + for directory in directories: + if not os.path.exists(directory): + self.log(f"Directory not found: {directory}") + continue + for root, _, files in os.walk(directory): + for file in files: + if file.endswith('.c'): + c_files.append(os.path.join(root, file)) + return c_files + + def extract_functions(self, content: str) -> Dict[str, Dict]: + """Extract all bool-returning functions from C code.""" + # Pattern to match bool-returning functions + pattern = r'(?:static\s+)?bool\s+(\w+)\s*\(([^)]*)\)\s*\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}' + + functions = {} + for match in re.finditer(pattern, content, re.MULTILINE | re.DOTALL): + func_name = match.group(1) + func_params = match.group(2) + func_body = match.group(3) + full_func = match.group(0) + + functions[func_name] = { + 'params': func_params, + 'body': func_body, + 'full': full_func, + 'start_pos': match.start(), + } + + return functions + + def get_bound_functions(self, content: str) -> Set[str]: + """Find functions that are bound as Python callables.""" + bound_funcs = set() + + # Binding patterns used in pocketpy + patterns = [ + r'py_bindfunc\s*\([^,]+,\s*"[^"]+",\s*(\w+)\)', + r'py_bind\s*\([^,]+,\s*"[^"]*",\s*(\w+)\)', + r'py_bindmagic\s*\([^,]+,\s*\w+,\s*(\w+)\)', + r'py_bindmethod\s*\([^,]+,\s*"[^"]+",\s*(\w+)\)', + r'py_bindproperty\s*\([^,]+,\s*"[^"]+",\s*(\w+)', + ] + + for pattern in patterns: + for match in re.finditer(pattern, content): + func_name = match.group(1) + bound_funcs.add(func_name) + self.log(f"Found bound function: {func_name}") + + return bound_funcs + + def remove_comments(self, code: str) -> str: + """Remove C-style comments from code.""" + # Remove single-line comments + code = re.sub(r'//.*?$', '', code, flags=re.MULTILINE) + # Remove multi-line comments + code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL) + return code + + def has_retval_usage(self, func_body: str) -> bool: + """Check if function body uses py_retval() in any form.""" + # Remove comments to avoid false positives + code_without_comments = self.remove_comments(func_body) + + # Check for direct patterns + for pattern in RETVAL_PATTERNS: + if re.search(pattern, code_without_comments): + return True + + # Check for functions that set py_retval internally + for func in RETVAL_SETTING_FUNCTIONS: + if func + '(' in code_without_comments: + return True + + return False + + def analyze_return_statements(self, func_body: str, func_name: str) -> List[Dict]: + """Analyze return true statements in the function.""" + lines = func_body.split('\n') + suspicious_returns = [] + + for i, line in enumerate(lines): + # Look for "return true" statements + if re.search(r'\breturn\s+true\b', line): + # Get context (10 lines before the return) + start = max(0, i - 10) + context_lines = lines[start:i+1] + context = '\n'.join(context_lines) + + suspicious_returns.append({ + 'line_num': i + 1, + 'line': line.strip(), + 'context': context, + }) + + return suspicious_returns + + def check_function(self, func_name: str, func_info: Dict, filepath: str) -> bool: + """ + Check if a bound function properly sets py_retval() before returning true. + Returns True if there's an issue, False otherwise. + """ + func_body = func_info['body'] + + # Skip if function doesn't return true + if 'return true' not in func_body: + self.log(f"Function {func_name} doesn't return true, skipping") + return False + + # Check if function has any py_retval usage + if self.has_retval_usage(func_body): + self.log(f"Function {func_name} uses py_retval(), OK") + return False + + # Found a potential issue + self.log(f"Function {func_name} returns true without py_retval()!") + + suspicious_returns = self.analyze_return_statements(func_body, func_name) + + issue = { + 'file': filepath, + 'function': func_name, + 'full_code': func_info['full'], + 'suspicious_returns': suspicious_returns, + } + + self.issues.append(issue) + return True + + def check_file(self, filepath: str) -> int: + """Check all bound functions in a file.""" + self.log(f"Checking file: {filepath}") + + try: + with open(filepath, 'r', encoding='utf-8', errors='ignore') as f: + content = f.read() + except Exception as e: + print(f"Error reading {filepath}: {e}", file=sys.stderr) + return 0 + + # Extract functions and find bound ones + functions = self.extract_functions(content) + bound_funcs = self.get_bound_functions(content) + + if not bound_funcs: + self.log(f"No bound functions found in {filepath}") + return 0 + + issues_count = 0 + for func_name in bound_funcs: + if func_name not in functions: + self.log(f"Bound function {func_name} not found in extracted functions") + continue + + if self.check_function(func_name, functions[func_name], filepath): + issues_count += 1 + + return issues_count + + def check_directories(self, *directories: str) -> int: + """Check all C files in the given directories.""" + c_files = self.find_c_files(*directories) + + if not c_files: + print("No C files found to check", file=sys.stderr) + return 0 + + self.log(f"Found {len(c_files)} C files to check") + + total_issues = 0 + for filepath in c_files: + issues = self.check_file(filepath) + total_issues += issues + + return total_issues + + def print_report(self): + """Print a detailed report of all issues found.""" + if not self.issues: + print("✓ No issues found! All Python binding functions properly set py_retval().") + return + + print(f"\n{'='*80}") + print(f"Found {len(self.issues)} function(s) with potential issues:") + print(f"{'='*80}\n") + + for i, issue in enumerate(self.issues, 1): + print(f"Issue #{i}:") + print(f" File: {issue['file']}") + print(f" Function: {issue['function']}") + print(f" Problem: Function returns true but doesn't set py_retval()") + print(f"\n Function code:") + print(" " + "-" * 76) + for line in issue['full_code'].split('\n'): + print(f" {line}") + print(" " + "-" * 76) + + if issue['suspicious_returns']: + print(f"\n Found {len(issue['suspicious_returns'])} 'return true' statement(s):") + for ret in issue['suspicious_returns']: + print(f" Line {ret['line_num']}: {ret['line']}") + + print(f"\n{'='*80}\n") + + +def main(): + parser = argparse.ArgumentParser( + description='Check Python binding functions for missing py_retval() assignments', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__ + ) + parser.add_argument( + '--verbose', '-v', + action='store_true', + help='Enable verbose output for debugging' + ) + parser.add_argument( + '--dirs', + nargs='+', + default=['src/bindings', 'src/modules'], + help='Directories to check (default: src/bindings src/modules)' + ) + + args = parser.parse_args() + + # Create checker and run analysis + checker = BindingChecker(verbose=args.verbose) + + print("Checking Python binding functions for missing py_retval() assignments...") + print(f"Target directories: {', '.join(args.dirs)}") + print() + + try: + total_issues = checker.check_directories(*args.dirs) + checker.print_report() + + # Exit with appropriate code + sys.exit(1 if total_issues > 0 else 0) + + except Exception as e: + print(f"\nError during analysis: {e}", file=sys.stderr) + if args.verbose: + import traceback + traceback.print_exc() + sys.exit(2) + + +if __name__ == '__main__': + main() diff --git a/tests/test_check_binding_retval.py b/tests/test_check_binding_retval.py new file mode 100644 index 00000000..a9b1a790 --- /dev/null +++ b/tests/test_check_binding_retval.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +""" +Test suite for check_binding_retval.py script. + +This test verifies that the binding return value checker correctly identifies +issues in Python binding functions. +""" + +import os +import sys +import tempfile +import subprocess +from pathlib import Path + +# Get the repository root +REPO_ROOT = Path(__file__).parent.parent +CHECKER_SCRIPT = REPO_ROOT / "scripts" / "check_binding_retval.py" + + +def run_checker(test_dir): + """Run the checker on a test directory and return the exit code and output.""" + result = subprocess.run( + [sys.executable, str(CHECKER_SCRIPT), "--dirs", test_dir], + capture_output=True, + text=True + ) + return result.returncode, result.stdout, result.stderr + + +def test_correct_binding(): + """Test that correct bindings pass validation.""" + with tempfile.TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "test_correct.c" + test_file.write_text(""" +#include "pocketpy/pocketpy.h" + +// Correct: sets py_retval() with py_newint +static bool correct_function_1(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + py_newint(py_retval(), 42); + return true; +} + +// Correct: sets py_retval() with py_newnone +static bool correct_function_2(int argc, py_Ref argv) { + PY_CHECK_ARGC(0); + py_newnone(py_retval()); + return true; +} + +// Correct: uses py_import which sets py_retval() +static bool correct_function_3(int argc, py_Ref argv) { + int res = py_import("test"); + if(res == 1) return true; + return false; +} + +void register_correct() { + py_GlobalRef mod = py_newmodule("test"); + py_bindfunc(mod, "f1", correct_function_1); + py_bindfunc(mod, "f2", correct_function_2); + py_bindfunc(mod, "f3", correct_function_3); +} +""") + + exit_code, stdout, stderr = run_checker(tmpdir) + assert exit_code == 0, f"Expected exit code 0, got {exit_code}\n{stdout}\n{stderr}" + assert "No issues found" in stdout, f"Expected success message\n{stdout}" + print("✓ test_correct_binding passed") + + +def test_incorrect_binding(): + """Test that incorrect bindings are detected.""" + with tempfile.TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "test_incorrect.c" + test_file.write_text(""" +#include "pocketpy/pocketpy.h" + +// Incorrect: returns true without setting py_retval() +static bool incorrect_function(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + // Missing py_retval() assignment + return true; +} + +void register_incorrect() { + py_GlobalRef mod = py_newmodule("test"); + py_bindfunc(mod, "bad", incorrect_function); +} +""") + + exit_code, stdout, stderr = run_checker(tmpdir) + assert exit_code == 1, f"Expected exit code 1, got {exit_code}\n{stdout}\n{stderr}" + assert "incorrect_function" in stdout, f"Expected to find function name\n{stdout}" + assert "potential issues" in stdout, f"Expected issues message\n{stdout}" + print("✓ test_incorrect_binding passed") + + +def test_comments_ignored(): + """Test that comments mentioning py_retval() don't cause false negatives.""" + with tempfile.TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "test_comments.c" + test_file.write_text(""" +#include "pocketpy/pocketpy.h" + +// This function has comments about py_retval() but doesn't actually set it +static bool buggy_function(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + // TODO: Should call py_retval() here + /* py_newint(py_retval(), 42); */ + return true; // BUG: Missing actual py_retval() +} + +void register_buggy() { + py_GlobalRef mod = py_newmodule("test"); + py_bindfunc(mod, "bug", buggy_function); +} +""") + + exit_code, stdout, stderr = run_checker(tmpdir) + assert exit_code == 1, f"Expected exit code 1, got {exit_code}\n{stdout}\n{stderr}" + assert "buggy_function" in stdout, f"Expected to find function name\n{stdout}" + print("✓ test_comments_ignored passed") + + +def test_actual_codebase(): + """Test that the actual codebase passes validation.""" + src_bindings = REPO_ROOT / "src" / "bindings" + src_modules = REPO_ROOT / "src" / "modules" + + if not src_bindings.exists() or not src_modules.exists(): + print("⊘ test_actual_codebase skipped (directories not found)") + return + + exit_code, stdout, stderr = run_checker(str(REPO_ROOT / "src")) + assert exit_code == 0, f"Actual codebase should pass validation\n{stdout}\n{stderr}" + print("✓ test_actual_codebase passed") + + +def main(): + """Run all tests.""" + print("Running tests for check_binding_retval.py...") + print("=" * 80) + + tests = [ + test_correct_binding, + test_incorrect_binding, + test_comments_ignored, + test_actual_codebase, + ] + + failed = 0 + for test in tests: + try: + test() + except AssertionError as e: + print(f"✗ {test.__name__} failed: {e}") + failed += 1 + except Exception as e: + print(f"✗ {test.__name__} error: {e}") + failed += 1 + + print("=" * 80) + if failed == 0: + print(f"All {len(tests)} tests passed!") + return 0 + else: + print(f"{failed}/{len(tests)} tests failed!") + return 1 + + +if __name__ == "__main__": + sys.exit(main())