Update utils/block_relation_builder.py
Browse files- utils/block_relation_builder.py +282 -6
utils/block_relation_builder.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import json
|
| 2 |
import copy
|
| 3 |
import re
|
| 4 |
-
from collections import defaultdict
|
| 5 |
import secrets
|
| 6 |
import string
|
| 7 |
from typing import Dict, Any, TypedDict,Tuple
|
|
@@ -2512,8 +2512,8 @@ def process_scratch_blocks(all_generated_blocks, generated_output_json):
|
|
| 2512 |
|
| 2513 |
# Initialize dictionaries to store and reuse generated unique IDs
|
| 2514 |
# This prevents creating multiple unique IDs for the same variable/broadcast across different blocks
|
| 2515 |
-
variable_id_map = defaultdict(lambda: generate_secure_token(
|
| 2516 |
-
broadcast_id_map = defaultdict(lambda: generate_secure_token(
|
| 2517 |
|
| 2518 |
# Define the mapping for input field names to their required integer types for shadows
|
| 2519 |
input_type_mapping = {
|
|
@@ -2602,7 +2602,7 @@ def process_scratch_blocks(all_generated_blocks, generated_output_json):
|
|
| 2602 |
else:
|
| 2603 |
# Fallback: try original generated_output_json value if present, else synthesize
|
| 2604 |
fallback = gen_block_data.get("inputs", {}).get(input_name,
|
| 2605 |
-
[1, [11, "message1", generate_secure_token(
|
| 2606 |
processed_block["inputs"][input_name] = fallback
|
| 2607 |
continue
|
| 2608 |
|
|
@@ -3065,6 +3065,281 @@ def variable_adder_main(project_data):
|
|
| 3065 |
return processed_json
|
| 3066 |
except Exception as e:
|
| 3067 |
print(f"Error error in the variable initialization opcodes: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3068 |
#################################################################################################################################################################
|
| 3069 |
#--------------------------------------------------[Helper main function]----------------------------------------------------------------------------------------
|
| 3070 |
#################################################################################################################################################################
|
|
@@ -3114,11 +3389,12 @@ when I receive [Game Start v]
|
|
| 3114 |
end
|
| 3115 |
"""
|
| 3116 |
# print(pseudo_code)
|
| 3117 |
-
#
|
|
|
|
| 3118 |
# all_generated_blocks = generate_plan(generated_output_json, initial_opcode_occurrences, pseudo_code)
|
| 3119 |
# processed_blocks= process_scratch_blocks(all_generated_blocks, generated_output_json)
|
| 3120 |
# renamed_blocks, renamed_counts = rename_blocks(processed_blocks, initial_opcode_occurrences)
|
| 3121 |
-
#
|
| 3122 |
# print("--------------\n\n")
|
| 3123 |
# print(processed_blocks)
|
| 3124 |
# print("--------------\n\n")
|
|
|
|
| 1 |
import json
|
| 2 |
import copy
|
| 3 |
import re
|
| 4 |
+
from collections import defaultdict, Counter
|
| 5 |
import secrets
|
| 6 |
import string
|
| 7 |
from typing import Dict, Any, TypedDict,Tuple
|
|
|
|
| 2512 |
|
| 2513 |
# Initialize dictionaries to store and reuse generated unique IDs
|
| 2514 |
# This prevents creating multiple unique IDs for the same variable/broadcast across different blocks
|
| 2515 |
+
variable_id_map = defaultdict(lambda: generate_secure_token())
|
| 2516 |
+
broadcast_id_map = defaultdict(lambda: generate_secure_token())
|
| 2517 |
|
| 2518 |
# Define the mapping for input field names to their required integer types for shadows
|
| 2519 |
input_type_mapping = {
|
|
|
|
| 2602 |
else:
|
| 2603 |
# Fallback: try original generated_output_json value if present, else synthesize
|
| 2604 |
fallback = gen_block_data.get("inputs", {}).get(input_name,
|
| 2605 |
+
[1, [11, "message1", generate_secure_token()]])
|
| 2606 |
processed_block["inputs"][input_name] = fallback
|
| 2607 |
continue
|
| 2608 |
|
|
|
|
| 3065 |
return processed_json
|
| 3066 |
except Exception as e:
|
| 3067 |
print(f"Error error in the variable initialization opcodes: {e}")
|
| 3068 |
+
|
| 3069 |
+
#################################################################################################################################################################
|
| 3070 |
+
#--------------------------------------------------[Helper function to generate Opcode]--------------------------------------------------------------------------
|
| 3071 |
+
#################################################################################################################################################################
|
| 3072 |
+
|
| 3073 |
+
def _find_all_opcodes(code_block: str) -> list[str]:
|
| 3074 |
+
"""
|
| 3075 |
+
Finds all Scratch opcodes in a given code block using a series of
|
| 3076 |
+
regex patterns. This function is designed to handle multi-line blocks
|
| 3077 |
+
by processing the entire code block and finding all matches. The
|
| 3078 |
+
patterns are ordered from most specific to least specific to prevent
|
| 3079 |
+
misclassification.
|
| 3080 |
+
|
| 3081 |
+
Args:
|
| 3082 |
+
code_block: A string containing the entire pseudo-code.
|
| 3083 |
+
|
| 3084 |
+
Returns:
|
| 3085 |
+
A list of all detected opcode strings.
|
| 3086 |
+
"""
|
| 3087 |
+
opcodes = []
|
| 3088 |
+
|
| 3089 |
+
# Define a list of regex patterns and their corresponding opcodes,
|
| 3090 |
+
# ordered from most specific to least specific. The re.DOTALL flag
|
| 3091 |
+
# allows '.' to match newlines, which is crucial for multi-line blocks.
|
| 3092 |
+
patterns = [
|
| 3093 |
+
# --- Multi-line Control Blocks (most specific, non-greedy) ---
|
| 3094 |
+
(r"if <.+?> then(?:.|\n)+?else(?:.|\n)+?end", "control_if_else"), #(to test muliple stack)
|
| 3095 |
+
(r"forever", "control_forever"),
|
| 3096 |
+
(r"if <.+?> then", "control_if"),
|
| 3097 |
+
(r"repeat until <.+?>", "control_repeat_until"),
|
| 3098 |
+
(r"repeat\s+(?:\(.+?\)|\[.+?(?:\s+v)?\]|\S+)", "control_repeat"),
|
| 3099 |
+
(r"stop\s+(?:all|this script|other scripts in sprite|\[(?:all|this script|other scripts in sprite)(?:\s+v)?\])(?!\s+sounds)", "control_stop"),
|
| 3100 |
+
(r"when I start as a clone", "control_start_as_clone"),
|
| 3101 |
+
(r"create clone of \[.+?(?:\s+v)?\]", "control_create_clone_of"),
|
| 3102 |
+
(r"delete this clone", "control_delete_this_clone"),
|
| 3103 |
+
(r"wait\s+(?:\(.+?\)|\[.+?(?:\s+v)?\]|\S+)\s+sec(?:ond)?s?", "control_wait"),
|
| 3104 |
+
(r"wait until <.+?>", "control_wait_until"),
|
| 3105 |
+
|
| 3106 |
+
# --- Event Blocks (most specific) ---
|
| 3107 |
+
(r"when green flag clicked", "event_whenflagclicked"),
|
| 3108 |
+
(r"when\s+(?:key\s+\[(.+?)(?:\s+v)?\]|\[(.+?)(?:\s+v)?\]\s+key)\s+pressed", "event_whenkeypressed"),
|
| 3109 |
+
(r"when this sprite clicked", "event_whenthisspriteclicked"),
|
| 3110 |
+
(r"when backdrop switches to \[.+?(?:\s+v)?\]", "event_whenbackdropswitchesto"),
|
| 3111 |
+
(r"when I receive \[.+?(?:\s+v)?\]", "event_whenbroadcastreceived"),
|
| 3112 |
+
(r"when \[.+?(?:\s+v)?\] > (.+)", "event_whengreaterthan"),
|
| 3113 |
+
(r"broadcast \[.+?(?:\s+v)?\] and wait", "event_broadcastandwait"),
|
| 3114 |
+
(r"broadcast \[.+?(?:\s+v)?\]", "event_broadcast"),
|
| 3115 |
+
|
| 3116 |
+
# --- Data Blocks (Variables and Lists) - specific block types first ---
|
| 3117 |
+
(r"set\s*\[\s*.+?(?:\s+v)?\s*\]\s*to\s*\(?\s*.+?\s*\)?", "data_setvariableto"),
|
| 3118 |
+
(r"change\s*\[\s*.+?(?:\s+v)?\s*\]\s*by\s*\(?\s*.+?\s*\)?", "data_changevariableby"),
|
| 3119 |
+
(r"show variable \[.+?(?:\s+v)?\]", "data_showvariable"),
|
| 3120 |
+
(r"hide variable \[.+?(?:\s+v)?\]", "data_hidevariable"),
|
| 3121 |
+
(r"show list \[.+?(?:\s+v)?\]", "data_showlist"),
|
| 3122 |
+
(r"hide list \[.+?(?:\s+v)?\]", "data_hidelist"),
|
| 3123 |
+
(r"add\s+(?:\[.+?\]|\(.+?\)|\w+)\s+to\s+\[.+?(?:\s+v)?\]", "data_addtolist"),
|
| 3124 |
+
(r"delete\s*\((?!all\)).+?\)\s*of\s*\[.+?(?:\s+v)?\]", "data_deleteoflist"),
|
| 3125 |
+
(r"delete\s*\(all\)\s*of\s*\[.+?(?:\s+v)?\]", "data_deletealloflist"),
|
| 3126 |
+
(r"insert\s+(\(.+?\)|\[.+?\]|\(\[.+?\]\)|[^\s]+)\s+at\s+(\(.+?\)|\[.+?\]|\(\[.+?\]\)|\d+)\s+of\s+\[.+?(?:\s+v)?\]", "data_insertatlist"),
|
| 3127 |
+
(r"replace\s+item\s+(\(.+?\)|\[\s*.+?\s*(?:v)?\]|[^\s]+)\s+of\s+\[.+?(?:\s+v)?\]\s+with\s+(\(.+?\)|\[\s*.+?\s*(?:v)?\]|.+)","data_replaceitemoflist"),
|
| 3128 |
+
(r"[<(]\s*\[[^\]]+?\s+v\]\s*contains\s*\[[^\]]+?\]\s*\??\s*[)>]", "data_listcontainsitem"),
|
| 3129 |
+
(r"\(item\s+#\s+of\s+\(?(.+?)\)?\s+in\s+\[.+?(?:\s+v)?\]\)", "data_itemnumoflist"),
|
| 3130 |
+
(r"(?<!replace\s)\(?item(?!\s+#)\s+(\(.+?\)|\[\s*.+?\s*(?:v)?\]|[^\s]+)\s+of\s+\[.+?(?:\s+v)?\]\)?", "data_itemoflist"),
|
| 3131 |
+
(r"\(length of \[.+?(?:\s+v)?\]\)", "data_lengthoflist"),
|
| 3132 |
+
|
| 3133 |
+
# --- Sensing Blocks ---
|
| 3134 |
+
(r"ask \[.+?\] and wait", "sensing_askandwait"),
|
| 3135 |
+
(r"(?<!when\s)key\s+\[.+?(?:\s+v)?\]\s+pressed\??", "sensing_keypressed"),
|
| 3136 |
+
(r"mouse down\??", "sensing_mousedown"),
|
| 3137 |
+
(r"""\s*<?\s*color\s*\[?\s*(#[0-9A-Fa-f]{6})\s*\]?\s*is\s+touching\s*\[?\s*(#[0-9A-Fa-f]{6})\s*\]?\s*\??\s*>?\s*""", "sensing_coloristouchingcolor"),
|
| 3138 |
+
(r"(?<!is\s)touching\s*(?:color\s*)?\[?\s*#([0-9A-Fa-f]{6})\s*\]?\??", "sensing_touchingcolor"),
|
| 3139 |
+
(r"touching \[.+? v\]\??", "sensing_touchingobject"),
|
| 3140 |
+
(r"set drag mode \[.+? v\]", "sensing_setdragmode"),
|
| 3141 |
+
(r"reset timer", "sensing_resettimer"),
|
| 3142 |
+
(r"(?i)(?<!\bat\s)(?<!\bdelete\s)(?<!\binsert\s)(?<!\breplace\s)(?<!\bcreate\s)(?<!\bitem\s)(?<!\bletter\s)"r"(?:"r"\(\s*(?!(?:item\b|\d+|\#|length\b|insert\b|delete\b|replace\b|create\b|letter\b))[^()]{1,200}?\)\s*of\s*\[[^\]]+?(?:\s+v)?\](?!\s*in\s*\[)"r"|"r"\(\s*(?!(?:item\b|\d+|\#|length\b|insert\b|delete\b|replace\b|create\b|letter\b)).*?of\s*\[[^\]]+?(?:\s+v)?\].*?\)"r"|"r"\[\s*(?!(?:item\b|\d+|\#|length\b|letter\b))[^\]]+?\]\s*of\s*\[[^\]]+?(?:\s+v)?\](?!\s*in\s*\[)"r"|"r"\b(?:backdrop|costume|x\s+position|y\s+position|direction|size|volume|loudness|answer|day|month|year|username|timer|mouse\s+x|mouse\s+y)\b\s*of\s*\[[^\]]+?(?:\s+v)?\](?!\s*in\s*\[)"r")","sensing_of"),
|
| 3143 |
+
(r"\(current \[.+? v]\)", "sensing_current"),
|
| 3144 |
+
(r"\(?answer\)?", "sensing_answer"), #(to test muliple bracket and alone should treet as the keyword)
|
| 3145 |
+
(r"\(?username\)?", "sensing_username"), #(to test muliple bracket and alone should treet as the keyword)
|
| 3146 |
+
|
| 3147 |
+
# --- Sound Blocks ---
|
| 3148 |
+
(r"play sound \[.+? v\] until done", "sound_playuntildone"),
|
| 3149 |
+
(r"start sound \[.+? v\]", "sound_play"),
|
| 3150 |
+
(r"stop all sounds", "sound_stopallsounds"),
|
| 3151 |
+
(r"change volume by\s*(?:\((.+?)\)|\[(.+?)\]|(.+))", "sound_changevolumeby"),
|
| 3152 |
+
(r"""set\ volume\ to\s+\(?\s*(?:-?\d+(?:\.\d+)?|\[?[a-zA-Z_][\w\s]*\]?(?:\ v)?)\s*\)?\s*%?""", "sound_setvolumeto"),
|
| 3153 |
+
|
| 3154 |
+
(r"\(volume\)", "sound_volume"),
|
| 3155 |
+
|
| 3156 |
+
# --- Motion Blocks ---
|
| 3157 |
+
(r"go to x:\s*\(?(.+?)\)?\s*y:\s*\(?(.+?)\)?", "motion_gotoxy"),
|
| 3158 |
+
(r"set x to (.+)", "motion_setx"),
|
| 3159 |
+
(r"set y to (.+)", "motion_sety"),
|
| 3160 |
+
(r"move\s*\(?(.+?)\)?\s*(?:steps?)?", "motion_movesteps"),
|
| 3161 |
+
(r"turn right\s*\(?(.+?)\)?\s*(?:degrees?)?", "motion_turnright"),
|
| 3162 |
+
(r"turn left\s*\(?(.+?)\)?\s*(?:degrees?)?", "motion_turnleft"),
|
| 3163 |
+
(r"go to\s*(?:random position|mouse-pointer|\[.*?\]|.+)", "motion_goto"), #(to mouse-pointer is not include here for now)
|
| 3164 |
+
(r"point in direction\s*\(?(.+?)\)?", "motion_pointindirection"),
|
| 3165 |
+
(r"point towards \[.+? v\]", "motion_pointtowards"),
|
| 3166 |
+
(r"change x by\s*\(?(.+?)\)?", "motion_changexby"),
|
| 3167 |
+
(r"change y by\s*\(?(.+?)\)?", "motion_changeyby"),
|
| 3168 |
+
(r"glide\s*\(?(.+?)\)?\s*(?:sec|secs|second|seconds)\s*to\s*x:\s*\(?(.+?)\)?\s*y:\s*\(?(.+?)\)?", "motion_glidesecstoxy"),
|
| 3169 |
+
(r"glide\s*\(?(.+?)\)?\s*(?:sec|secs|second|seconds)\s*to\s*\[.*?\]", "motion_glideto"),
|
| 3170 |
+
(r"if on edge, bounce", "motion_ifonedgebounce"),
|
| 3171 |
+
(r"set rotation style\s*\[(?:left-right|all around|don't rotate)(?:\s*v)?\]", "motion_setrotationstyle"),
|
| 3172 |
+
(r"\(?x position\)?", "motion_xposition"), #(to x positon may detect where var is used)
|
| 3173 |
+
(r"\(?y position\)?", "motion_yposition"), #(to y position may detect where var is used)
|
| 3174 |
+
(r"\(?direction\)?", "motion_direction"), #(to direction may detect where var is used)
|
| 3175 |
+
|
| 3176 |
+
# --- Looks Blocks ---
|
| 3177 |
+
(r"switch costume to \[.+? v\]", "looks_switchcostumeto"),
|
| 3178 |
+
(r"next costume", "looks_nextcostume"),
|
| 3179 |
+
(r"switch backdrop to \[.+? v\] and wait", "looks_switchbackdroptowait"),
|
| 3180 |
+
(r"switch backdrop to \[.+? v\]", "looks_switchbackdropto"),
|
| 3181 |
+
(r"next backdrop", "looks_nextbackdrop"),
|
| 3182 |
+
(r"^\s*show\s*$", "looks_show"),
|
| 3183 |
+
(r"^\s*hide\s*$", "looks_hide"),
|
| 3184 |
+
(r"say\s+(?:\[.+?\]|\(.+?\)|.+?)\s*for\s*\(?(.+?)\)?\s*(?:sec|secs|second|seconds)", "looks_sayforsecs"),
|
| 3185 |
+
(r"say\s+(?!.*\bfor\b\s*\(?\d+\)?\s*(?:sec|secs|second|seconds))(?:\[.+?\]|\(.+?\)|.+?)", "looks_say"),
|
| 3186 |
+
(r"think\s*\[.+?\]\s*for\s*\(?(.+?)\)?\s*(?:sec|secs|second|seconds)", "looks_thinkforsecs"),
|
| 3187 |
+
(r"think\s*\[.+?\]", "looks_think"),
|
| 3188 |
+
(r"change size by\s*\(?(.+?)\)?", "looks_changesizeby"),
|
| 3189 |
+
(r"set size to\s*\(?(.+?)\)?\s*%?", "looks_setsizeto"),
|
| 3190 |
+
(r"change\s*\[(.+?)(?:\s*v)?\]\s*effect by\s*\(?(.+?)\)?", "looks_changeeffectby"),
|
| 3191 |
+
(r"set\s*\[(.+?)(?:\s*v)?\]\s*effect to\s*\(?(.+?)\)?", "looks_seteffectto"),
|
| 3192 |
+
(r"clear graphic effects", "looks_cleargraphiceffects"),
|
| 3193 |
+
(r"\(costume \[.+? v\]\)", "looks_costumenumbername"),
|
| 3194 |
+
(r"\(backdrop \[.+? v\]\)", "looks_backdropnumbername"),
|
| 3195 |
+
|
| 3196 |
+
# --- Operators ---
|
| 3197 |
+
(r"<\s*[^<>?]+\s*<\s*[^<>?]+\s*>", "operator_lt"),
|
| 3198 |
+
(r"<\s*[^<>?]+\s*=\s*[^<>?]+\s*>", "operator_equals"),
|
| 3199 |
+
(r"<\s*[^<>?]+\s*>\s*[^<>?]+\s*>", "operator_gt"),
|
| 3200 |
+
(r"<\s*.*?\s+and\s+.*?\s*>", "operator_and"),
|
| 3201 |
+
(r"<\s*.*?\s+or\s+.*?\s*>", "operator_or"),
|
| 3202 |
+
(r"<\s*not\s+.*?\s*>", "operator_not"),
|
| 3203 |
+
(r"(?:\(join\s+(.+?)\s+(.+?)\)|join\s+(.+?)\s+(.+?))", "operator_join"),
|
| 3204 |
+
(r"\(\s*.+?\s*\+\s*.+?\s*\)", "operator_add"),
|
| 3205 |
+
(r"\(\s*(?!-\s*\d+(?:\.\d+)?\s*\))(.+?)\s+-\s+(.+?)\)", "operator_subtract"),
|
| 3206 |
+
(r"\(\s*.+?\s*\*\s*.+?\s*\)", "operator_multiply"),
|
| 3207 |
+
(r"\(\s*.+?\s*/\s*.+?\s*\)", "operator_divide"),
|
| 3208 |
+
(r"\(pick random\s+(.+?)\s+to\s+(.+?)\)", "operator_random"),
|
| 3209 |
+
(r"\(letter\s+(.+?)\s+of\s+(.+?)\)", "operator_letterof"),
|
| 3210 |
+
(r"\(length of\s+(.+?)\)", "operator_length"),
|
| 3211 |
+
(r"\(\s*.+?\s+mod\s+.+?\s*\)", "operator_mod"),
|
| 3212 |
+
(r"\(round\s+(.+?)\)", "operator_round"),
|
| 3213 |
+
(r"[<(]\s*\[(?![^\]]*\s+v\])[^\]]+?\]\s*contains\s*\[[^\]]+?\]\s*\??\s*[)>]", "operator_contains"),
|
| 3214 |
+
(r"\(\s*\[?(abs|floor|ceiling|sqrt|sin|cos|tan|asin|acos|atan|ln|log|e \^|10 \^)\s*(?:v)?\]?\s+of\s+.+?\)", "operator_mathop"),
|
| 3215 |
+
|
| 3216 |
+
]
|
| 3217 |
+
|
| 3218 |
+
for pattern, opcode in patterns:
|
| 3219 |
+
for match in re.finditer(pattern, code_block, re.DOTALL):
|
| 3220 |
+
opcodes.append(opcode)
|
| 3221 |
+
|
| 3222 |
+
return opcodes
|
| 3223 |
+
|
| 3224 |
+
def analyze_opcode_counts(pseudo_code: str) -> list[dict]:
|
| 3225 |
+
"""
|
| 3226 |
+
Analyzes a block of Scratch-like pseudo-code to count the occurrences
|
| 3227 |
+
of each opcode using a multi-pass, regex-based classifier.
|
| 3228 |
+
|
| 3229 |
+
Args:
|
| 3230 |
+
pseudo_code: A string containing the pseudo-code.
|
| 3231 |
+
|
| 3232 |
+
Returns:
|
| 3233 |
+
A list of dictionaries, where each dictionary contains the opcode
|
| 3234 |
+
and its count.
|
| 3235 |
+
"""
|
| 3236 |
+
opcode_counts = Counter()
|
| 3237 |
+
|
| 3238 |
+
opcodes_in_code = _find_all_opcodes(pseudo_code)
|
| 3239 |
+
for opcode in opcodes_in_code:
|
| 3240 |
+
opcode_counts[opcode] += 1
|
| 3241 |
+
|
| 3242 |
+
result = [{"opcode": opcode, "count": count} for opcode, count in opcode_counts.items()]
|
| 3243 |
+
|
| 3244 |
+
# Sort the result by opcode for consistent output.
|
| 3245 |
+
result.sort(key=lambda x: x['opcode'])
|
| 3246 |
+
|
| 3247 |
+
return result
|
| 3248 |
+
|
| 3249 |
+
#################################################################################################################################################################
|
| 3250 |
+
#--------------------------------------------------[Helper function to seperate an correct the json]-------------------------------------------------------------
|
| 3251 |
+
#################################################################################################################################################################
|
| 3252 |
+
|
| 3253 |
+
def separate_scripts(pseudocode_string):
|
| 3254 |
+
"""
|
| 3255 |
+
Separates a block of Scratch pseudocode into a list of individual scripts.
|
| 3256 |
+
|
| 3257 |
+
The function splits the pseudocode at specific "hat" block strings
|
| 3258 |
+
and keeps the hat block as the beginning of each new script.
|
| 3259 |
+
|
| 3260 |
+
Args:
|
| 3261 |
+
pseudocode_string (str): A string containing Scratch pseudocode.
|
| 3262 |
+
|
| 3263 |
+
Returns:
|
| 3264 |
+
list: A list of strings, where each string is a complete,
|
| 3265 |
+
separated script.
|
| 3266 |
+
"""
|
| 3267 |
+
# Define the "hat" block patterns. The parentheses around each pattern
|
| 3268 |
+
# are a "capturing group," which tells re.split() to include the
|
| 3269 |
+
# delimiter in the output.
|
| 3270 |
+
delimiter_patterns = [
|
| 3271 |
+
r"when green flag clicked",
|
| 3272 |
+
r"when .*? key pressed",
|
| 3273 |
+
r"when this sprite clicked",
|
| 3274 |
+
r"when backdrop switches to \[.*?\]",
|
| 3275 |
+
r"when \[.*?\] > \[.*?\]",
|
| 3276 |
+
r"when I receive \[.*?\]"
|
| 3277 |
+
]
|
| 3278 |
+
|
| 3279 |
+
# Join the patterns with a logical OR (|) to create a single regex pattern.
|
| 3280 |
+
combined_pattern = "|".join(f"({p})" for p in delimiter_patterns)
|
| 3281 |
+
|
| 3282 |
+
# Split the string using the combined pattern. The re.DOTALL flag
|
| 3283 |
+
# ensures that the dot (.) matches newline characters as well.
|
| 3284 |
+
# The result will be an alternating list of captured delimiters and the text
|
| 3285 |
+
# that follows them.
|
| 3286 |
+
split_result = re.split(combined_pattern, pseudocode_string, flags=re.DOTALL)
|
| 3287 |
+
|
| 3288 |
+
# Filter out empty strings that result from the split
|
| 3289 |
+
filtered_list = [item for item in split_result if item and item.strip()]
|
| 3290 |
+
|
| 3291 |
+
# Reassemble the scripts by pairing the delimiters with their corresponding
|
| 3292 |
+
# code blocks. The split result will look something like:
|
| 3293 |
+
# ['delimiter1', 'code block1', 'delimiter2', 'code block2']
|
| 3294 |
+
# We want to combine these pairs.
|
| 3295 |
+
scripts = []
|
| 3296 |
+
# Loop through the list two items at a time
|
| 3297 |
+
for i in range(0, len(filtered_list), 2):
|
| 3298 |
+
if i + 1 < len(filtered_list):
|
| 3299 |
+
scripts.append(filtered_list[i] + filtered_list[i+1])
|
| 3300 |
+
else:
|
| 3301 |
+
scripts.append(filtered_list[i])
|
| 3302 |
+
|
| 3303 |
+
return scripts
|
| 3304 |
+
|
| 3305 |
+
def transform_logic_to_action_flow(source_data, description=""):
|
| 3306 |
+
"""
|
| 3307 |
+
Transforms a 'refined_logic' JSON structure into an 'action_overall_flow' structure.
|
| 3308 |
+
|
| 3309 |
+
Args:
|
| 3310 |
+
source_data (dict): The input dictionary with 'refined_logic', 'name_variable',
|
| 3311 |
+
and 'pseudocode' keys.
|
| 3312 |
+
description (str): A description to be added to the output structure.
|
| 3313 |
+
|
| 3314 |
+
Returns:
|
| 3315 |
+
dict: A dictionary in the desired 'action_overall_flow' format.
|
| 3316 |
+
"""
|
| 3317 |
+
# Check if the required keys exist in the source data
|
| 3318 |
+
if "refined_logic" not in source_data or \
|
| 3319 |
+
"name_variable" not in source_data["refined_logic"] or \
|
| 3320 |
+
"pseudocode" not in source_data["refined_logic"]:
|
| 3321 |
+
raise ValueError("Input dictionary is missing required keys: 'refined_logic', 'name_variable', or 'pseudocode'.")
|
| 3322 |
+
|
| 3323 |
+
# Extract the name and the pseudocode list from the source data
|
| 3324 |
+
name_variable = source_data["refined_logic"]["name_variable"]
|
| 3325 |
+
pseudocode_list = source_data["refined_logic"]["pseudocode"]
|
| 3326 |
+
|
| 3327 |
+
# Transform the list of pseudocode strings into a list of dictionaries
|
| 3328 |
+
# with the "logic" key.
|
| 3329 |
+
plans_list = [{"logic": logic_block} for logic_block in pseudocode_list]
|
| 3330 |
+
|
| 3331 |
+
# Construct the final nested dictionary structure
|
| 3332 |
+
transformed_data = {
|
| 3333 |
+
"action_overall_flow": {
|
| 3334 |
+
name_variable: {
|
| 3335 |
+
"description": description,
|
| 3336 |
+
"plans": plans_list
|
| 3337 |
+
}
|
| 3338 |
+
}
|
| 3339 |
+
}
|
| 3340 |
+
|
| 3341 |
+
return transformed_data
|
| 3342 |
+
|
| 3343 |
#################################################################################################################################################################
|
| 3344 |
#--------------------------------------------------[Helper main function]----------------------------------------------------------------------------------------
|
| 3345 |
#################################################################################################################################################################
|
|
|
|
| 3389 |
end
|
| 3390 |
"""
|
| 3391 |
# print(pseudo_code)
|
| 3392 |
+
# opcode_counts_result = analyze_opcode_counts(pseudo_code)
|
| 3393 |
+
# generated_output_json, initial_opcode_occurrences = generate_blocks_from_opcodes(opcode_counts_result, all_block_definitions)
|
| 3394 |
# all_generated_blocks = generate_plan(generated_output_json, initial_opcode_occurrences, pseudo_code)
|
| 3395 |
# processed_blocks= process_scratch_blocks(all_generated_blocks, generated_output_json)
|
| 3396 |
# renamed_blocks, renamed_counts = rename_blocks(processed_blocks, initial_opcode_occurrences)
|
| 3397 |
+
# print(opcode_counts_result)
|
| 3398 |
# print("--------------\n\n")
|
| 3399 |
# print(processed_blocks)
|
| 3400 |
# print("--------------\n\n")
|