DeFi-Simulation-Lab/script_validator.py at main · Arpit-R-Doshi/DeFi-Simulation-Lab · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
"""
script_validator.py
===================
Validates and sanitizes generated Hardhat scripts before execution.
Blocks dangerous patterns: file system access, shell execution,
arbitrary network calls, and destructive opcodes.
"""

import re
import logging
from typing import Dict, Any, List, Tuple

logger = logging.getLogger(__name__)

# ─────────────────────────────────────────────────────────────────── #
# BLOCK-LISTS                                                          #
# ─────────────────────────────────────────────────────────────────── #

# JS patterns that should never appear in generated scripts
_BLOCKED_JS_PATTERNS: List[Tuple[str, str]] = [
    # File system access
    (r'\brequire\s*\(\s*[\'"]fs[\'"]\s*\)',            "fs module access blocked"),
    (r'\bfs\s*\.\s*(readFile|writeFile|unlink|rm)',    "file system write/read blocked"),
    (r'\bprocess\.env\b',                              "process.env access blocked"),
    (r'\bprocess\.exit\s*\(\s*[^01]\s*\)',             "arbitrary process.exit blocked"),

    # Shell / exec
    (r'\bchild_process\b',                             "child_process blocked"),
    (r'\bexec\s*\(',                                   "exec() blocked"),
    (r'\bspawn\s*\(',                                  "spawn() blocked"),
    (r'\bexecSync\s*\(',                               "execSync() blocked"),

    # Network (only localhost:8545 is allowed)
    (r'https?://(?!127\.0\.0\.1:8545)',                "external HTTP requests blocked"),
    (r'\bfetch\s*\(\s*[\'"]https?://',                 "fetch to external URL blocked"),
    (r'\baxios\b',                                     "axios network lib blocked"),

    # Dangerous eval / code execution
    (r'\beval\s*\(',                                   "eval() blocked"),
    (r'\bnew\s+Function\s*\(',                         "new Function() blocked"),

    # Wallet private key exfiltration
    (r'privateKey',                                    "private key reference blocked"),
    (r'mnemonic',                                      "mnemonic reference blocked"),
]

# Allowed Hardhat methods whitelist (RPC calls must be in this list)
_ALLOWED_RPC_METHODS = {
    "hardhat_mine",
    "hardhat_setBalance",
    "hardhat_impersonateAccount",
    "hardhat_stopImpersonatingAccount",
    "hardhat_setCode",
    "hardhat_setStorageAt",
    "evm_mine",
    "evm_increaseTime",
    "evm_setNextBlockTimestamp",
    "evm_snapshot",
    "evm_revert",
    "eth_getBalance",
    "eth_call",
    "eth_blockNumber",
    "eth_gasPrice",
}

# Max script size (bytes) — prevent huge injection attacks
_MAX_SCRIPT_BYTES = 128_000


# ─────────────────────────────────────────────────────────────────── #
# VALIDATOR                                                            #
# ─────────────────────────────────────────────────────────────────── #

class ValidationError(Exception):
    """Raised when a script fails safety validation."""
    pass


def validate_script(script: str) -> Dict[str, Any]:
    """
    Validate a generated Hardhat JS script for safety.

    Returns:
        {
          "valid":    bool,
          "errors":   [list of blocking issues],
          "warnings": [list of non-blocking notes],
          "stats":    {lines, bytes, rpc_calls}
        }

    Raises:
        ValidationError if `valid` is False and raise_on_error=True.
    """
    errors:   List[str] = []
    warnings: List[str] = []

    # ── Size check ───────────────────────────────────────────────────
    byte_size = len(script.encode("utf-8"))
    if byte_size > _MAX_SCRIPT_BYTES:
        errors.append(f"Script too large: {byte_size:,} bytes (max {_MAX_SCRIPT_BYTES:,})")

    # ── Dangerous pattern scan ───────────────────────────────────────
    for pattern, message in _BLOCKED_JS_PATTERNS:
        if re.search(pattern, script, re.IGNORECASE):
            errors.append(f"Security violation: {message}")

    # ── RPC method whitelist check ────────────────────────────────────
    rpc_calls_found = re.findall(
        r'provider\.send\s*\(\s*[\'"]([^\'"]+)[\'"]',
        script
    )
    non_whitelisted = [m for m in rpc_calls_found if m not in _ALLOWED_RPC_METHODS]
    if non_whitelisted:
        errors.append(f"Non-whitelisted RPC method(s): {', '.join(non_whitelisted)}")

    # ── Structural checks ────────────────────────────────────────────
    if "async function main()" not in script:
        errors.append("Missing required async function main()")
    if "main()" not in script:
        errors.append("Script never calls main()")
    if "require(\"hardhat\")" not in script and 'require("hardhat")' not in script:
        warnings.append("Hardhat import not detected — script may not run in hardhat context")

    # ── Selfdestruct / dangerous opcodes ─────────────────────────────
    if re.search(r'\bselfdestruct\b', script, re.IGNORECASE):
        warnings.append("selfdestruct call detected — use with caution")

    # ── Stats ────────────────────────────────────────────────────────
    lines = script.count("\n") + 1

    return {
        "valid":    len(errors) == 0,
        "errors":   errors,
        "warnings": warnings,
        "stats": {
            "lines":     lines,
            "bytes":     byte_size,
            "rpc_calls": len(rpc_calls_found),
        }
    }


def sanitize_script(script: str) -> str:
    """
    Apply light sanitization to a script:
    - Replace Windows line endings with Unix
    - Remove BOM
    - Strip trailing whitespace
    """
    script = script.lstrip("\ufeff")        # BOM
    script = script.replace("\r\n", "\n")   # CRLF → LF
    lines  = [line.rstrip() for line in script.splitlines()]
    return "\n".join(lines)


def validate_and_sanitize(script: str) -> Tuple[str, Dict[str, Any]]:
    """
    Convenience wrapper: sanitize then validate.

    Returns:
        (sanitized_script, validation_result)
    """
    script = sanitize_script(script)
    result = validate_script(script)
    if not result["valid"]:
        logger.warning(
            "Script validation FAILED: %s",
            "; ".join(result["errors"])
        )
    else:
        logger.info(
            "Script validation OK (%d lines, %d bytes, %d RPC calls)",
            result["stats"]["lines"],
            result["stats"]["bytes"],
            result["stats"]["rpc_calls"],
        )
    return script, result