From 991b8673a523da4535f9be4ce11c8af407a2994d Mon Sep 17 00:00:00 2001 From: Afonso Oliveira Date: Sat, 30 Aug 2025 22:26:20 +0100 Subject: [PATCH 1/4] feat: add SystemVerilog backend generator Implements a new backend generator for SystemVerilog output, matching the exact format used by riscv-opcodes/inst.sverilog. This provides direct compatibility with hardware designs using the riscv-opcodes SystemVerilog package format. Features: - Generates SystemVerilog package with instruction and CSR definitions - Outputs 32-bit instruction patterns with proper bit encoding - Handles compressed (16-bit) instructions correctly - Supports all standard RISC-V extensions - Integrated with the ./do build system as gen:sverilog task The generator produces output identical to riscv-opcodes format: - Instructions as 'localparam [31:0] NAME = 32'bpattern' - CSRs as 'localparam logic [11:0] CSR_NAME = 12'haddr' - Proper alignment and formatting for readability Tested against riscv-opcodes/inst.sverilog to ensure format compatibility. Signed-off-by: Afonso Oliveira --- .../generators/sverilog/sverilog_generator.py | 179 ++++++++++++++++++ backends/generators/tasks.rake | 25 +++ 2 files changed, 204 insertions(+) create mode 100644 backends/generators/sverilog/sverilog_generator.py diff --git a/backends/generators/sverilog/sverilog_generator.py b/backends/generators/sverilog/sverilog_generator.py new file mode 100644 index 0000000000..c508bef700 --- /dev/null +++ b/backends/generators/sverilog/sverilog_generator.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 + +import argparse +import os +import sys +import logging +from pathlib import Path + +# Add parent directory to path for imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from generator import load_instructions, load_csrs + + +def format_instruction_name(name): + """Format instruction name for SystemVerilog (uppercase with underscores).""" + # Handle compressed instructions + if name.startswith("c."): + name = "C_" + name[2:] + # Replace dots with underscores and convert to uppercase + return name.replace(".", "_").upper() + + +def format_csr_name(name): + """Format CSR name for SystemVerilog (uppercase with underscores).""" + return "CSR_" + name.replace(".", "_").upper() + + +def match_to_sverilog_bits(match_str, is_compressed=False): + """Convert a match string to SystemVerilog bit pattern.""" + if not match_str: + return "32'b" + "?" * 32 + + # For compressed instructions (16-bit), we need to handle them differently + # The riscv-opcodes format puts the 16-bit pattern in the lower 16 bits + # with the upper 16 bits as wildcards + if is_compressed or len(match_str) == 16: + # Pad with wildcards on the left for 16-bit instructions + match_str = "?" * 16 + match_str + elif len(match_str) < 32: + # For other cases, pad on the right + match_str = match_str + "-" * (32 - len(match_str)) + + # Convert to SystemVerilog format (0, 1, or ?) + result = [] + for bit in match_str: + if bit == "0": + result.append("0") + elif bit == "1": + result.append("1") + else: # '-' or any other character + result.append("?") + + return "32'b" + "".join(result) + + +def generate_sverilog(instructions, csrs, output_file): + """Generate SystemVerilog package file.""" + with open(output_file, "w") as f: + # Write header + f.write("\n/* Automatically generated by parse_opcodes */\n") + f.write("package riscv_instr;\n") + + # Find the maximum name length for alignment + max_instr_len = max((len(format_instruction_name(name)) for name in instructions.keys()), default=0) + max_csr_len = max((len(format_csr_name(csrs[addr])) for addr in csrs.keys()), default=0) + max_len = max(max_instr_len, max_csr_len) + + # Write instruction parameters + for name in sorted(instructions.keys()): + encoding = instructions[name] + sv_name = format_instruction_name(name) + # Pad the name for alignment + padded_name = sv_name.ljust(max_len) + + # Get the match pattern + if isinstance(encoding, dict) and "match" in encoding: + match = encoding["match"] + else: + # If no match field, use all wildcards + match = "-" * 32 + + # Check if this is a compressed instruction + is_compressed = name.startswith("c.") + sv_bits = match_to_sverilog_bits(match, is_compressed) + f.write(f" localparam [31:0] {padded_name} = {sv_bits};\n") + + # Write CSR parameters + # CSRs are returned as {address: name} by load_csrs + for addr in sorted(csrs.keys()): + csr_name = csrs[addr] + sv_name = format_csr_name(csr_name) + # Pad the name for alignment + padded_name = sv_name.ljust(max_len) + + # Format CSR address as 12-bit hex + f.write(f" localparam logic [11:0] {padded_name} = 12'h{addr:03x};\n") + + # Write footer + f.write("\nendpackage\n") + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Generate SystemVerilog package from RISC-V instruction definitions" + ) + parser.add_argument( + "--inst-dir", + default="../../../gen/resolved_spec/_/inst/", + help="Directory containing instruction YAML files", + ) + parser.add_argument( + "--csr-dir", + default="../../../gen/resolved_spec/_/csr/", + help="Directory containing CSR YAML files", + ) + parser.add_argument( + "--output", + default="inst.sverilog", + help="Output SystemVerilog file name" + ) + parser.add_argument( + "--extensions", + default="A,D,F,I,M,Q,Zba,Zbb,Zbs,S,System,V,Zicsr,Smpmp,Sm,H,U,Zicntr,Zihpm,Smhpm", + help="Comma-separated list of enabled extensions. Default includes standard extensions.", + ) + parser.add_argument( + "--arch", + default="RV64", + choices=["RV32", "RV64", "BOTH"], + help="Target architecture (RV32, RV64, or BOTH). Default is RV64.", + ) + parser.add_argument( + "--verbose", "-v", action="store_true", help="Enable verbose logging" + ) + parser.add_argument( + "--include-all", + action="store_true", + help="Include all instructions and CSRs regardless of extensions", + ) + return parser.parse_args() + + +def main(): + args = parse_args() + + # Set up logging + log_level = logging.DEBUG if args.verbose else logging.INFO + logging.basicConfig(level=log_level, format="%(levelname)s:: %(message)s") + + # Parse extensions + if args.include_all: + enabled_extensions = [] + logging.info("Including all instructions and CSRs (ignoring extension filter)") + else: + enabled_extensions = [ext.strip() for ext in args.extensions.split(",")] + logging.info(f"Enabled extensions: {', '.join(enabled_extensions)}") + + logging.info(f"Target architecture: {args.arch}") + + # Load instructions + instructions = load_instructions( + args.inst_dir, enabled_extensions, args.include_all, args.arch + ) + logging.info(f"Loaded {len(instructions)} instructions") + + # Load CSRs + csrs = load_csrs(args.csr_dir, enabled_extensions, args.include_all, args.arch) + logging.info(f"Loaded {len(csrs)} CSRs") + + # Generate the SystemVerilog file + generate_sverilog(instructions, csrs, args.output) + logging.info( + f"Generated {args.output} with {len(instructions)} instructions and {len(csrs)} CSRs" + ) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/backends/generators/tasks.rake b/backends/generators/tasks.rake index 2049f8e4e9..e26829ad9e 100644 --- a/backends/generators/tasks.rake +++ b/backends/generators/tasks.rake @@ -6,6 +6,7 @@ require 'tempfile' directory "#{$root}/gen/go" directory "#{$root}/gen/c_header" +directory "#{$root}/gen/sverilog" namespace :gen do desc <<~DESC @@ -87,4 +88,28 @@ namespace :gen do resolved_codes_file.unlink end end + + desc <<~DESC + Generate SystemVerilog package from RISC-V instruction and CSR definitions + + Options: + * CONFIG - Configuration name (defaults to "_") + * OUTPUT_DIR - Output directory for generated SystemVerilog code (defaults to "#{$root}/gen/sverilog") + DESC + task sverilog: "#{$root}/gen/sverilog" do + config_name = ENV["CONFIG"] || "_" + output_dir = ENV["OUTPUT_DIR"] || "#{$root}/gen/sverilog/" + + # Ensure the output directory exists + FileUtils.mkdir_p output_dir + + # Get the arch paths based on the config + resolver = Udb::Resolver.new + cfg_arch = resolver.cfg_arch_for(config_name) + inst_dir = cfg_arch.path / "inst" + csr_dir = cfg_arch.path / "csr" + + # Run the SystemVerilog generator script using the same Python environment + sh "#{$root}/.home/.venv/bin/python3 #{$root}/backends/generators/sverilog/sverilog_generator.py --inst-dir=#{inst_dir} --csr-dir=#{csr_dir} --output=#{output_dir}inst.sverilog" + end end From 123f65d4e23c26721be6f5ca346883381de63c1f Mon Sep 17 00:00:00 2001 From: AliAlaa88 Date: Mon, 15 Sep 2025 09:29:11 +0000 Subject: [PATCH 2/4] refactor: update SystemVerilog generator for improved match handling and output file naming --- .../generators/sverilog/sverilog_generator.py | 77 +++++++++---------- backends/generators/tasks.rake | 2 +- 2 files changed, 39 insertions(+), 40 deletions(-) diff --git a/backends/generators/sverilog/sverilog_generator.py b/backends/generators/sverilog/sverilog_generator.py index c508bef700..cf0b8f4744 100644 --- a/backends/generators/sverilog/sverilog_generator.py +++ b/backends/generators/sverilog/sverilog_generator.py @@ -14,9 +14,6 @@ def format_instruction_name(name): """Format instruction name for SystemVerilog (uppercase with underscores).""" - # Handle compressed instructions - if name.startswith("c."): - name = "C_" + name[2:] # Replace dots with underscores and convert to uppercase return name.replace(".", "_").upper() @@ -26,31 +23,26 @@ def format_csr_name(name): return "CSR_" + name.replace(".", "_").upper() -def match_to_sverilog_bits(match_str, is_compressed=False): +def match_to_sverilog_bits(match_str): """Convert a match string to SystemVerilog bit pattern.""" if not match_str: - return "32'b" + "?" * 32 - - # For compressed instructions (16-bit), we need to handle them differently - # The riscv-opcodes format puts the 16-bit pattern in the lower 16 bits + logging.error(f"Empty match string encountered.") + # return "32'b" + "?" * 32 + + # For compressed instructions (16-bit), we need to handle them differently. + # The 16-bit pattern is in the lower 16 bits, # with the upper 16 bits as wildcards - if is_compressed or len(match_str) == 16: + if len(match_str) == 16: # Pad with wildcards on the left for 16-bit instructions match_str = "?" * 16 + match_str elif len(match_str) < 32: # For other cases, pad on the right - match_str = match_str + "-" * (32 - len(match_str)) - + logging.error(f"Match string length is {len(match_str)}, expected 32 or 16.") + # match_str = match_str + "-" * (32 - len(match_str)) + # Convert to SystemVerilog format (0, 1, or ?) - result = [] - for bit in match_str: - if bit == "0": - result.append("0") - elif bit == "1": - result.append("1") - else: # '-' or any other character - result.append("?") - + result = match_str.replace("-", "?") + return "32'b" + "".join(result) @@ -58,33 +50,40 @@ def generate_sverilog(instructions, csrs, output_file): """Generate SystemVerilog package file.""" with open(output_file, "w") as f: # Write header - f.write("\n/* Automatically generated by parse_opcodes */\n") + f.write("\n/* Automatically generated by UDB */\n") f.write("package riscv_instr;\n") - + # Find the maximum name length for alignment - max_instr_len = max((len(format_instruction_name(name)) for name in instructions.keys()), default=0) - max_csr_len = max((len(format_csr_name(csrs[addr])) for addr in csrs.keys()), default=0) + max_instr_len = max( + (len(format_instruction_name(name)) for name in instructions.keys()), + default=0, + ) + max_csr_len = max( + (len(format_csr_name(csrs[addr])) for addr in csrs.keys()), default=0 + ) max_len = max(max_instr_len, max_csr_len) - + # Write instruction parameters for name in sorted(instructions.keys()): encoding = instructions[name] sv_name = format_instruction_name(name) # Pad the name for alignment padded_name = sv_name.ljust(max_len) - + # if not name.startswith("v"): + # logging.info(f"Processing instruction: {name}") + # logging.info(f"Formatted instruction: {sv_name}") + # logging.info(f"Padded instruction: {padded_name}") + # Get the match pattern if isinstance(encoding, dict) and "match" in encoding: match = encoding["match"] else: # If no match field, use all wildcards - match = "-" * 32 - - # Check if this is a compressed instruction - is_compressed = name.startswith("c.") - sv_bits = match_to_sverilog_bits(match, is_compressed) - f.write(f" localparam [31:0] {padded_name} = {sv_bits};\n") - + logging.warning(f"No match field for instruction {name}.") + + sv_bits = match_to_sverilog_bits(match) + f.write(f" localparam logic [31:0] {padded_name} = {sv_bits};\n") + # Write CSR parameters # CSRs are returned as {address: name} by load_csrs for addr in sorted(csrs.keys()): @@ -92,10 +91,10 @@ def generate_sverilog(instructions, csrs, output_file): sv_name = format_csr_name(csr_name) # Pad the name for alignment padded_name = sv_name.ljust(max_len) - + # Format CSR address as 12-bit hex f.write(f" localparam logic [11:0] {padded_name} = 12'h{addr:03x};\n") - + # Write footer f.write("\nendpackage\n") @@ -115,9 +114,9 @@ def parse_args(): help="Directory containing CSR YAML files", ) parser.add_argument( - "--output", - default="inst.sverilog", - help="Output SystemVerilog file name" + "--output", + default="riscv_decode_package.svh", + help="Output SystemVerilog file name", ) parser.add_argument( "--extensions", @@ -176,4 +175,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/backends/generators/tasks.rake b/backends/generators/tasks.rake index e26829ad9e..8f38d1c32f 100644 --- a/backends/generators/tasks.rake +++ b/backends/generators/tasks.rake @@ -110,6 +110,6 @@ namespace :gen do csr_dir = cfg_arch.path / "csr" # Run the SystemVerilog generator script using the same Python environment - sh "#{$root}/.home/.venv/bin/python3 #{$root}/backends/generators/sverilog/sverilog_generator.py --inst-dir=#{inst_dir} --csr-dir=#{csr_dir} --output=#{output_dir}inst.sverilog" + sh "#{$root}/.home/.venv/bin/python3 #{$root}/backends/generators/sverilog/sverilog_generator.py --inst-dir=#{inst_dir} --csr-dir=#{csr_dir} --output=#{output_dir}riscv_decode_package.svh" end end From 6de42322bb6434bd17fa04752663bc3162175c17 Mon Sep 17 00:00:00 2001 From: AliAlaa88 Date: Mon, 15 Sep 2025 21:49:19 +0000 Subject: [PATCH 3/4] fix: using localparam logic for exception causes for consistency with the other items in the package --- .github/workflows/regress.yml | 11 ++ .../generators/c_header/generate_encoding.py | 121 +--------------- backends/generators/generator.py | 132 ++++++++++++++++-- .../generators/sverilog/sverilog_generator.py | 74 ++++++---- backends/generators/tasks.rake | 79 ++++++----- 5 files changed, 233 insertions(+), 184 deletions(-) diff --git a/.github/workflows/regress.yml b/.github/workflows/regress.yml index 45420910b2..b98007bd29 100755 --- a/.github/workflows/regress.yml +++ b/.github/workflows/regress.yml @@ -190,6 +190,17 @@ jobs: uses: ./.github/actions/singularity-setup - name: Generate c_header code run: ./do gen:c_header + regress-gen-sverilog: + runs-on: ubuntu-latest + env: + SINGULARITY: 1 + steps: + - name: Clone Github Repo Action + uses: actions/checkout@v4 + - name: singularity setup + uses: ./.github/actions/singularity-setup + - name: Generate sverilog_header code + run: ./do gen:sverilog regress-cpp-unit: runs-on: ubuntu-latest env: diff --git a/backends/generators/c_header/generate_encoding.py b/backends/generators/c_header/generate_encoding.py index aba6592511..022942d8c6 100644 --- a/backends/generators/c_header/generate_encoding.py +++ b/backends/generators/c_header/generate_encoding.py @@ -8,7 +8,6 @@ import logging import argparse import yaml -import json # Add parent directory to path to import generator.py parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -18,6 +17,7 @@ from generator import ( load_instructions, load_csrs, + load_exception_codes, parse_match, parse_extension_requirements, ) @@ -30,125 +30,6 @@ def calculate_mask(match_str): return int("".join("0" if c == "-" else "1" for c in match_str), 2) -def load_exception_codes( - ext_dir, enabled_extensions=None, include_all=False, resolved_codes_file=None -): - """Load exception codes from extension YAML files or pre-resolved JSON file.""" - exception_codes = [] - found_extensions = 0 - found_files = 0 - - if enabled_extensions is None: - enabled_extensions = [] - - # If we have a resolved codes file, use it instead of processing YAML files - if resolved_codes_file and os.path.exists(resolved_codes_file): - try: - with open(resolved_codes_file, encoding="utf-8") as f: - resolved_codes = json.load(f) - - for code in resolved_codes: - num = code.get("num") - name = code.get("name") - if num is not None and name is not None: - sanitized_name = ( - name.lower() - .replace(" ", "_") - .replace("/", "_") - .replace("-", "_") - ) - exception_codes.append((num, sanitized_name)) - - logging.info( - f"Loaded {len(exception_codes)} pre-resolved exception codes from {resolved_codes_file}" - ) - - # Sort by exception code number and deduplicate - seen_nums = set() - unique_codes = [] - for num, name in sorted(exception_codes, key=lambda x: x[0]): - if num not in seen_nums: - seen_nums.add(num) - unique_codes.append((num, name)) - - return unique_codes - - except Exception as e: - logging.error( - f"Error loading resolved codes file {resolved_codes_file}: {e}" - ) - # Fall back to processing YAML files - - for dirpath, _, filenames in os.walk(ext_dir): - for fname in filenames: - if not fname.endswith(".yaml"): - continue - - found_files += 1 - path = os.path.join(dirpath, fname) - - try: - with open(path, encoding="utf-8") as f: - data = yaml.safe_load(f) - - if not isinstance(data, dict) or data.get("kind") != "extension": - continue - - found_extensions += 1 - ext_name = data.get("name", "unnamed") - - # Skip extension filtering if include_all is True - if not include_all: - # Filter by extension requirements - definedBy = data.get("definedBy") - if definedBy: - meets_req = parse_extension_requirements(definedBy) - if not meets_req(enabled_extensions): - continue - - # Check if excluded - excludedBy = data.get("excludedBy") - if excludedBy: - exclusion_check = parse_extension_requirements(excludedBy) - if exclusion_check(enabled_extensions): - continue - - # Get exception codes - for code in data.get("exception_codes", []): - num = code.get("num") - name = code.get("name") - - if num is not None and name is not None: - sanitized_name = ( - name.lower() - .replace(" ", "_") - .replace("/", "_") - .replace("-", "_") - ) - exception_codes.append((num, sanitized_name)) - - except Exception as e: - logging.error(f"Error processing file {path}: {e}") - - if found_extensions > 0: - logging.info( - f"Found {found_extensions} extension definitions in {found_files} files" - ) - logging.info(f"Added {len(exception_codes)} exception codes to the output") - else: - logging.warning(f"No extension definitions found in {ext_dir}") - - # Sort by exception code number and deduplicate - seen_nums = set() - unique_codes = [] - for num, name in sorted(exception_codes, key=lambda x: x[0]): - if num not in seen_nums: - seen_nums.add(num) - unique_codes.append((num, name)) - - return unique_codes - - def extract_instruction_fields(instructions): """Extract field names and their positions from instruction definitions.""" field_dict = {} diff --git a/backends/generators/generator.py b/backends/generators/generator.py index 13d29ae819..cfd88e8ce9 100755 --- a/backends/generators/generator.py +++ b/backends/generators/generator.py @@ -3,6 +3,7 @@ import yaml import logging import pprint +import json pp = pprint.PrettyPrinter(indent=2) logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") @@ -326,14 +327,15 @@ def load_instructions( # Process RV64 encoding rv64_match = rv64_encoding.get("match") + rv32_match = rv32_encoding.get("match") + if rv64_match: instr_dict[name] = { "match": rv64_match } # RV64 gets the default name - # Process RV32 encoding with a _rv32 suffix - rv32_match = rv32_encoding.get("match") - if rv32_match: + if rv32_match and rv32_match != rv64_match: + # Process RV32 encoding with a _rv32 suffix instr_dict[f"{name}_rv32"] = {"match": rv32_match} continue # Skip the rest of the loop as we've already added the encodings @@ -491,11 +493,7 @@ def load_csrs(csr_root, enabled_extensions, include_all=False, target_arch="RV64 else: addr_int = int(addr_to_use, 0) - # For BOTH architecture, add suffix to RV32-specific CSRs - if target_arch == "BOTH" and base == 32: - csrs[addr_int] = f"{name.upper()}.RV32" - else: - csrs[addr_int] = name.upper() + csrs[addr_int] = name.upper() except Exception as e: logging.error(f"Error parsing address {addr_to_use} in {path}: {e}") address_errors += 1 @@ -518,6 +516,124 @@ def load_csrs(csr_root, enabled_extensions, include_all=False, target_arch="RV64 return csrs +def load_exception_codes( + ext_dir, enabled_extensions=None, include_all=False, resolved_codes_file=None +): + """Load exception codes from extension YAML files or pre-resolved JSON file.""" + exception_codes = [] + found_extensions = 0 + found_files = 0 + + if enabled_extensions is None: + enabled_extensions = [] + # If we have a resolved codes file, use it instead of processing YAML files + if resolved_codes_file and os.path.exists(resolved_codes_file): + try: + with open(resolved_codes_file, encoding="utf-8") as f: + resolved_codes = json.load(f) + + for code in resolved_codes: + num = code.get("num") + name = code.get("name") + if num is not None and name is not None: + sanitized_name = ( + name.lower() + .replace(" ", "_") + .replace("/", "_") + .replace("-", "_") + ) + exception_codes.append((num, sanitized_name)) + + logging.info( + f"Loaded {len(exception_codes)} pre-resolved exception codes from {resolved_codes_file}" + ) + + # Sort by exception code number and deduplicate + seen_nums = set() + unique_codes = [] + for num, name in sorted(exception_codes, key=lambda x: x[0]): + if num not in seen_nums: + seen_nums.add(num) + unique_codes.append((num, name)) + + return unique_codes + + except Exception as e: + logging.error( + f"Error loading resolved codes file {resolved_codes_file}: {e}" + ) + # Fall back to processing YAML files + + for dirpath, _, filenames in os.walk(ext_dir): + for fname in filenames: + if not fname.endswith(".yaml"): + continue + + found_files += 1 + path = os.path.join(dirpath, fname) + + try: + with open(path, encoding="utf-8") as f: + data = yaml.safe_load(f) + + if not isinstance(data, dict) or data.get("kind") != "extension": + continue + + found_extensions += 1 + ext_name = data.get("name", "unnamed") + + # Skip extension filtering if include_all is True + if not include_all: + # Filter by extension requirements + definedBy = data.get("definedBy") + if definedBy: + meets_req = parse_extension_requirements(definedBy) + if not meets_req(enabled_extensions): + continue + + # Check if excluded + excludedBy = data.get("excludedBy") + if excludedBy: + exclusion_check = parse_extension_requirements(excludedBy) + if exclusion_check(enabled_extensions): + continue + + # Get exception codes + for code in data.get("exception_codes", []): + num = code.get("num") + name = code.get("name") + + if num is not None and name is not None: + sanitized_name = ( + name.lower() + .replace(" ", "_") + .replace("/", "_") + .replace("-", "_") + ) + exception_codes.append((num, sanitized_name)) + + except Exception as e: + logging.error(f"Error processing file {path}: {e}") + + if found_extensions > 0: + logging.info( + f"Found {found_extensions} extension definitions in {found_files} files" + ) + logging.info(f"Added {len(exception_codes)} exception codes to the output") + else: + logging.warning(f"No extension definitions found in {ext_dir}") + + # Sort by exception code number and deduplicate + seen_nums = set() + unique_codes = [] + for num, name in sorted(exception_codes, key=lambda x: x[0]): + if num not in seen_nums: + seen_nums.add(num) + unique_codes.append((num, name)) + + return unique_codes + + def parse_match(match_str): """ Convert the bit pattern string to an integer. diff --git a/backends/generators/sverilog/sverilog_generator.py b/backends/generators/sverilog/sverilog_generator.py index cf0b8f4744..282b0224b3 100644 --- a/backends/generators/sverilog/sverilog_generator.py +++ b/backends/generators/sverilog/sverilog_generator.py @@ -9,7 +9,7 @@ # Add parent directory to path for imports sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from generator import load_instructions, load_csrs +from generator import load_instructions, load_csrs, load_exception_codes def format_instruction_name(name): @@ -23,11 +23,15 @@ def format_csr_name(name): return "CSR_" + name.replace(".", "_").upper() +def format_cause_name(name): + """Format cause name for SystemVerilog (uppercase with underscores).""" + return "CAUSE_" + name.replace(".", "_").upper() + + def match_to_sverilog_bits(match_str): """Convert a match string to SystemVerilog bit pattern.""" if not match_str: logging.error(f"Empty match string encountered.") - # return "32'b" + "?" * 32 # For compressed instructions (16-bit), we need to handle them differently. # The 16-bit pattern is in the lower 16 bits, @@ -35,10 +39,8 @@ def match_to_sverilog_bits(match_str): if len(match_str) == 16: # Pad with wildcards on the left for 16-bit instructions match_str = "?" * 16 + match_str - elif len(match_str) < 32: - # For other cases, pad on the right + elif len(match_str) != 32: logging.error(f"Match string length is {len(match_str)}, expected 32 or 16.") - # match_str = match_str + "-" * (32 - len(match_str)) # Convert to SystemVerilog format (0, 1, or ?) result = match_str.replace("-", "?") @@ -46,12 +48,12 @@ def match_to_sverilog_bits(match_str): return "32'b" + "".join(result) -def generate_sverilog(instructions, csrs, output_file): +def generate_sverilog(instructions, csrs, causes, output_file): """Generate SystemVerilog package file.""" with open(output_file, "w") as f: # Write header f.write("\n/* Automatically generated by UDB */\n") - f.write("package riscv_instr;\n") + f.write(f"package {Path(output_file).stem};\n") # Find the maximum name length for alignment max_instr_len = max( @@ -61,6 +63,9 @@ def generate_sverilog(instructions, csrs, output_file): max_csr_len = max( (len(format_csr_name(csrs[addr])) for addr in csrs.keys()), default=0 ) + max_cause_len = max( + (len(format_cause_name(name)) for _, name in causes), default=0 + ) max_len = max(max_instr_len, max_csr_len) # Write instruction parameters @@ -69,17 +74,11 @@ def generate_sverilog(instructions, csrs, output_file): sv_name = format_instruction_name(name) # Pad the name for alignment padded_name = sv_name.ljust(max_len) - # if not name.startswith("v"): - # logging.info(f"Processing instruction: {name}") - # logging.info(f"Formatted instruction: {sv_name}") - # logging.info(f"Padded instruction: {padded_name}") - # Get the match pattern if isinstance(encoding, dict) and "match" in encoding: match = encoding["match"] else: - # If no match field, use all wildcards - logging.warning(f"No match field for instruction {name}.") + logging.error(f"No match field for instruction {name}.") sv_bits = match_to_sverilog_bits(match) f.write(f" localparam logic [31:0] {padded_name} = {sv_bits};\n") @@ -95,6 +94,13 @@ def generate_sverilog(instructions, csrs, output_file): # Format CSR address as 12-bit hex f.write(f" localparam logic [11:0] {padded_name} = 12'h{addr:03x};\n") + # Write exception code parameters + for num, name in causes: + sv_name = format_cause_name(name) + # Pad the name for alignment + padded_name = sv_name.ljust(max_cause_len) + f.write(f" localparam logic [5:0] {padded_name} = 6'h{num:02x};\n") + # Write footer f.write("\nendpackage\n") @@ -113,29 +119,40 @@ def parse_args(): default="../../../gen/resolved_spec/_/csr/", help="Directory containing CSR YAML files", ) + parser.add_argument( + "--ext-dir", + default="../../../arch/ext/", + help="Directory containing extension YAML files", + ) parser.add_argument( "--output", default="riscv_decode_package.svh", help="Output SystemVerilog file name", ) + parser.add_argument( + "--include-all", + action="store_true", + help="Include all instructions and CSRs regardless of extensions", + ) + parser.add_argument( + "--verbose", "-v", action="store_true", help="Enable verbose logging" + ) parser.add_argument( "--extensions", - default="A,D,F,I,M,Q,Zba,Zbb,Zbs,S,System,V,Zicsr,Smpmp,Sm,H,U,Zicntr,Zihpm,Smhpm", - help="Comma-separated list of enabled extensions. Default includes standard extensions.", + "-e", + nargs="+", + default=[], + help="Comma-separated list of enabled extensions.", ) parser.add_argument( "--arch", - default="RV64", + default="BOTH", choices=["RV32", "RV64", "BOTH"], help="Target architecture (RV32, RV64, or BOTH). Default is RV64.", ) parser.add_argument( - "--verbose", "-v", action="store_true", help="Enable verbose logging" - ) - parser.add_argument( - "--include-all", - action="store_true", - help="Include all instructions and CSRs regardless of extensions", + "--resolved-codes", + help="JSON file containing pre-resolved exception codes", ) return parser.parse_args() @@ -167,8 +184,17 @@ def main(): csrs = load_csrs(args.csr_dir, enabled_extensions, args.include_all, args.arch) logging.info(f"Loaded {len(csrs)} CSRs") + # Load exception codes + causes = load_exception_codes( + args.ext_dir, + args.extensions, + include_all=args.include_all, + resolved_codes_file=args.resolved_codes, + ) + logging.info(f"Loaded {len(causes)} exception codes") + # Generate the SystemVerilog file - generate_sverilog(instructions, csrs, args.output) + generate_sverilog(instructions, csrs, causes, args.output) logging.info( f"Generated {args.output} with {len(instructions)} instructions and {len(csrs)} CSRs" ) diff --git a/backends/generators/tasks.rake b/backends/generators/tasks.rake index 8f38d1c32f..5d84e5aeac 100644 --- a/backends/generators/tasks.rake +++ b/backends/generators/tasks.rake @@ -8,6 +8,41 @@ directory "#{$root}/gen/go" directory "#{$root}/gen/c_header" directory "#{$root}/gen/sverilog" +def with_resolved_exception_codes(cfg_arch) + # Process ERB templates in exception codes using Ruby ERB processing + resolved_exception_codes = [] + + # Collect all exception codes from extensions and resolve ERB templates + cfg_arch.extensions.each do |ext| + ext.exception_codes.each do |ecode| + # Use Ruby's ERB processing to resolve templates in exception names + resolved_name = cfg_arch.render_erb( + ecode.name, + "exception code name: #{ecode.name}" + ) + + resolved_exception_codes << { + "num" => ecode.num, + "name" => resolved_name, + "var" => ecode.var, + "ext" => ext.name + } + end + end + + # Write resolved exception codes to a temporary JSON file + tempfile = Tempfile.new(["resolved_exception_codes", ".json"]) + tempfile.write(JSON.pretty_generate(resolved_exception_codes)) + tempfile.flush + + begin + yield tempfile.path # Run the generator script + ensure + tempfile.close + tempfile.unlink + end +end + namespace :gen do desc <<~DESC Generate Go code from RISC-V instruction and CSR definitions @@ -56,36 +91,11 @@ namespace :gen do csr_dir = cfg_arch.path / "csr" ext_dir = cfg_arch.path / "ext" - # Process ERB templates in exception codes using Ruby ERB processing - resolved_exception_codes = [] - - # Collect all exception codes from extensions and resolve ERB templates - cfg_arch.extensions.each do |ext| - ext.exception_codes.each do |ecode| - # Use Ruby's ERB processing to resolve templates in exception names - resolved_name = cfg_arch.render_erb(ecode.name, "exception code name: #{ecode.name}") - - resolved_exception_codes << { - 'num' => ecode.num, - 'name' => resolved_name, - 'var' => ecode.var, - 'ext' => ext.name - } - end - end - - # Write resolved exception codes to a temporary JSON file - resolved_codes_file = Tempfile.new(['resolved_exception_codes', '.json']) - resolved_codes_file.write(JSON.pretty_generate(resolved_exception_codes)) - resolved_codes_file.flush - - begin - # Run the C header generator script using the same Python environment - # The script generates encoding.h for inclusion in C programs - sh "#{$root}/.home/.venv/bin/python3 #{$root}/backends/generators/c_header/generate_encoding.py --inst-dir=#{inst_dir} --csr-dir=#{csr_dir} --ext-dir=#{ext_dir} --resolved-codes=#{resolved_codes_file.path} --output=#{output_dir}encoding.out.h --include-all" - ensure - resolved_codes_file.close - resolved_codes_file.unlink + with_resolved_exception_codes(cfg_arch) do |resolved_codes| + sh "#{$root}/.home/.venv/bin/python3 #{$root}/backends/generators/c_header/generate_encoding.py " \ + "--inst-dir=#{inst_dir} --csr-dir=#{csr_dir} --ext-dir=#{ext_dir} " \ + "--resolved-codes=#{resolved_codes} " \ + "--output=#{output_dir}encoding.out.h --include-all" end end @@ -108,8 +118,13 @@ namespace :gen do cfg_arch = resolver.cfg_arch_for(config_name) inst_dir = cfg_arch.path / "inst" csr_dir = cfg_arch.path / "csr" + ext_dir = cfg_arch.path / "ext" - # Run the SystemVerilog generator script using the same Python environment - sh "#{$root}/.home/.venv/bin/python3 #{$root}/backends/generators/sverilog/sverilog_generator.py --inst-dir=#{inst_dir} --csr-dir=#{csr_dir} --output=#{output_dir}riscv_decode_package.svh" + with_resolved_exception_codes(cfg_arch) do |resolved_codes| + sh "#{$root}/.home/.venv/bin/python3 #{$root}/backends/generators/sverilog/sverilog_generator.py " \ + "--inst-dir=#{inst_dir} --csr-dir=#{csr_dir} --ext-dir=#{ext_dir} " \ + "--resolved-codes=#{resolved_codes} " \ + "--output=#{output_dir}riscv_decode_package.svh --include-all" + end end end From 0ba1fb98769aba9a86e415aa62618497e0b73bc7 Mon Sep 17 00:00:00 2001 From: Alieldin Alaa <147806061+AliAlaa88@users.noreply.github.com> Date: Tue, 14 Oct 2025 17:07:24 +0300 Subject: [PATCH 4/4] Update backends/generators/sverilog/sverilog_generator.py Co-authored-by: Jordan Carlin Signed-off-by: Alieldin Alaa <147806061+AliAlaa88@users.noreply.github.com> --- backends/generators/sverilog/sverilog_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/generators/sverilog/sverilog_generator.py b/backends/generators/sverilog/sverilog_generator.py index 282b0224b3..8dbe0ad08e 100644 --- a/backends/generators/sverilog/sverilog_generator.py +++ b/backends/generators/sverilog/sverilog_generator.py @@ -52,7 +52,7 @@ def generate_sverilog(instructions, csrs, causes, output_file): """Generate SystemVerilog package file.""" with open(output_file, "w") as f: # Write header - f.write("\n/* Automatically generated by UDB */\n") + f.write("/* Automatically generated by UDB */\n") f.write(f"package {Path(output_file).stem};\n") # Find the maximum name length for alignment