prthm11 commited on
Commit
da1199c
·
verified ·
1 Parent(s): edbcd91

Update utils/block_relation_builder.py

Browse files
Files changed (1) hide show
  1. utils/block_relation_builder.py +42 -49
utils/block_relation_builder.py CHANGED
@@ -3254,8 +3254,9 @@ def separate_scripts(pseudocode_string):
3254
  """
3255
  Separates a block of Scratch pseudocode into a list of individual scripts.
3256
 
3257
- The function splits the pseudocode at specific "hat" block strings
3258
- and keeps the hat block as the beginning of each new script.
 
3259
 
3260
  Args:
3261
  pseudocode_string (str): A string containing Scratch pseudocode.
@@ -3264,42 +3265,34 @@ def separate_scripts(pseudocode_string):
3264
  list: A list of strings, where each string is a complete,
3265
  separated script.
3266
  """
3267
- # Define the "hat" block patterns. The parentheses around each pattern
3268
- # are a "capturing group," which tells re.split() to include the
3269
- # delimiter in the output.
3270
- delimiter_patterns = [
3271
- r"when green flag clicked",
3272
- r"when .*? key pressed",
3273
- r"when this sprite clicked",
3274
- r"when backdrop switches to \[.*?\]",
3275
- r"when \[.*?\] > \[.*?\]",
3276
- r"when I receive \[.*?\]"
3277
- ]
3278
-
3279
- # Join the patterns with a logical OR (|) to create a single regex pattern.
3280
- combined_pattern = "|".join(f"({p})" for p in delimiter_patterns)
3281
-
3282
- # Split the string using the combined pattern. The re.DOTALL flag
3283
- # ensures that the dot (.) matches newline characters as well.
3284
- # The result will be an alternating list of captured delimiters and the text
3285
- # that follows them.
3286
- split_result = re.split(combined_pattern, pseudocode_string, flags=re.DOTALL)
3287
-
3288
- # Filter out empty strings that result from the split
3289
- filtered_list = [item for item in split_result if item and item.strip()]
3290
 
3291
- # Reassemble the scripts by pairing the delimiters with their corresponding
3292
- # code blocks. The split result will look something like:
3293
- # ['delimiter1', 'code block1', 'delimiter2', 'code block2']
3294
- # We want to combine these pairs.
3295
  scripts = []
3296
- # Loop through the list two items at a time
3297
- for i in range(0, len(filtered_list), 2):
3298
- if i + 1 < len(filtered_list):
3299
- scripts.append(filtered_list[i] + filtered_list[i+1])
3300
- else:
3301
- scripts.append(filtered_list[i])
3302
-
 
 
 
 
 
 
 
3303
  return scripts
3304
 
3305
  def transform_logic_to_action_flow(source_data, description=""):
@@ -3388,16 +3381,16 @@ when I receive [Game Start v]
3388
  end
3389
  end
3390
  """
3391
- # print(pseudo_code)
3392
- # opcode_counts_result = analyze_opcode_counts(pseudo_code)
3393
- # generated_output_json, initial_opcode_occurrences = generate_blocks_from_opcodes(opcode_counts_result, all_block_definitions)
3394
- # all_generated_blocks = generate_plan(generated_output_json, initial_opcode_occurrences, pseudo_code)
3395
- # processed_blocks= process_scratch_blocks(all_generated_blocks, generated_output_json)
3396
- # renamed_blocks, renamed_counts = rename_blocks(processed_blocks, initial_opcode_occurrences)
3397
- # print(opcode_counts_result)
3398
- # print("--------------\n\n")
3399
- # print(processed_blocks)
3400
- # print("--------------\n\n")
3401
- # print(initial_opcode_occurrences)
3402
- # print("--------------\n\n")
3403
- # print(renamed_blocks)
 
3254
  """
3255
  Separates a block of Scratch pseudocode into a list of individual scripts.
3256
 
3257
+ This function finds the start of each "hat" block and slices the
3258
+ original string to capture the full code block for each script,
3259
+ providing a more robust and reliable separation.
3260
 
3261
  Args:
3262
  pseudocode_string (str): A string containing Scratch pseudocode.
 
3265
  list: A list of strings, where each string is a complete,
3266
  separated script.
3267
  """
3268
+ # Define the "hat" block patterns with more robust regex.
3269
+ # We use a non-capturing group (?:...) for the patterns.
3270
+ # We use a logical OR (|) to combine them into a single pattern.
3271
+ delimiter_patterns = (
3272
+ r"when green flag clicked|when flag clicked|when \S+ key pressed|"
3273
+ r"when this sprite clicked|when backdrop switches to \[.*?\]|"
3274
+ r"when I receive \[.*?\]|when \[.*?\] > \[.*?\]"
3275
+ )
3276
+
3277
+ # Use re.finditer to get an iterator of all hat block matches.
3278
+ # The `re.DOTALL` flag allows the '.' to match newlines.
3279
+ matches = list(re.finditer(delimiter_patterns, pseudocode_string, flags=re.DOTALL | re.IGNORECASE))
 
 
 
 
 
 
 
 
 
 
 
3280
 
 
 
 
 
3281
  scripts = []
3282
+ # If no matches are found, return an empty list.
3283
+ if not matches:
3284
+ return []
3285
+
3286
+ # Iterate through the matches to slice the original string.
3287
+ for i in range(len(matches)):
3288
+ start = matches[i].start()
3289
+ end = matches[i+1].start() if i + 1 < len(matches) else len(pseudocode_string)
3290
+
3291
+ # Slice the pseudocode string from the start of one match to the start
3292
+ # of the next, or to the end of the string.
3293
+ script = pseudocode_string[start:end]
3294
+ scripts.append(script.strip())
3295
+
3296
  return scripts
3297
 
3298
  def transform_logic_to_action_flow(source_data, description=""):
 
3381
  end
3382
  end
3383
  """
3384
+ print(pseudo_code)
3385
+ opcode_counts_result = analyze_opcode_counts(pseudo_code)
3386
+ generated_output_json, initial_opcode_occurrences = generate_blocks_from_opcodes(opcode_counts_result, all_block_definitions)
3387
+ all_generated_blocks = generate_plan(generated_output_json, initial_opcode_occurrences, pseudo_code)
3388
+ processed_blocks= process_scratch_blocks(all_generated_blocks, generated_output_json)
3389
+ renamed_blocks, renamed_counts = rename_blocks(processed_blocks, initial_opcode_occurrences)
3390
+ print(opcode_counts_result)
3391
+ print("--------------\n\n")
3392
+ print(processed_blocks)
3393
+ print("--------------\n\n")
3394
+ print(initial_opcode_occurrences)
3395
+ print("--------------\n\n")
3396
+ print(renamed_blocks)