Spaces:
Paused
Paused
Ok2
Browse files
app.py
CHANGED
|
@@ -53,7 +53,7 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
| 53 |
attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
|
| 54 |
|
| 55 |
### FIND ZONES OF INTEREST
|
| 56 |
-
threshold_per_token = 0.
|
| 57 |
kernel_width = 6
|
| 58 |
context_width = 20 # Number of tokens to include as context on each side
|
| 59 |
kernel = np.ones((kernel_width, kernel_width))
|
|
@@ -85,24 +85,22 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
| 85 |
output_with_notes = [(el, None) for el in generated_tokens[:kernel_width]]
|
| 86 |
for row in range(kernel_width, len(generated_tokens)):
|
| 87 |
best_width, best_patch_start = find_largest_contiguous_patch(significant_areas[row-kernel_width+1])
|
| 88 |
-
|
| 89 |
if best_width is not None:
|
| 90 |
output_with_notes.append((generated_tokens[row], (best_width, best_patch_start)))
|
| 91 |
else:
|
| 92 |
output_with_notes.append((generated_tokens[row], None))
|
| 93 |
|
| 94 |
-
|
| 95 |
# Fuse the notes for consecutive output tokens if necessary
|
| 96 |
for i in range(len(output_with_notes)):
|
| 97 |
token, coords = output_with_notes[i]
|
| 98 |
if coords is not None:
|
| 99 |
best_width, best_patch_start = coords
|
| 100 |
note_width_generated = kernel_width
|
| 101 |
-
for next_id in
|
| 102 |
next_token, next_coords = output_with_notes[next_id]
|
| 103 |
if next_coords is not None:
|
| 104 |
next_width, next_patch_start = next_coords
|
| 105 |
-
if best_patch_start + best_width
|
| 106 |
# then notes are overlapping: thus we delete the last one and make the first wider if needed
|
| 107 |
output_with_notes[next_id] = (next_token, None)
|
| 108 |
larger_end = max(best_patch_start + best_width, next_patch_start + next_width)
|
|
@@ -112,6 +110,7 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
| 112 |
else:
|
| 113 |
output_with_notes[i] = (token, None, None)
|
| 114 |
|
|
|
|
| 115 |
for i, (token, coords, width) in enumerate(output_with_notes):
|
| 116 |
if coords is not None:
|
| 117 |
best_width, best_patch_start = coords
|
|
@@ -122,8 +121,6 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
| 122 |
first_part = "".join(input_tokens[context_start:significant_start])
|
| 123 |
significant_part = "".join(input_tokens[significant_start:significant_end])
|
| 124 |
final_part = "".join(input_tokens[significant_end:context_end])
|
| 125 |
-
print("KK", first_part, significant_part, final_part)
|
| 126 |
-
|
| 127 |
output_with_notes[i] = (token, (first_part, significant_part, final_part), width)
|
| 128 |
|
| 129 |
return output_with_notes
|
|
@@ -136,7 +133,7 @@ def create_html_with_hover(output_with_notes):
|
|
| 136 |
(token, notes, width) = output_with_notes[i]
|
| 137 |
if notes is None:
|
| 138 |
html += f'{token}'
|
| 139 |
-
i +=1
|
| 140 |
else:
|
| 141 |
text = "".join([element[0] for element in output_with_notes[i:i+width]])
|
| 142 |
first_part, significant_part, final_part = notes
|
|
@@ -144,7 +141,7 @@ def create_html_with_hover(output_with_notes):
|
|
| 144 |
html += f'<span class="hoverable" data-note-id="note-{note_number}">{text}<sup>[{note_number+1}]</sup>'
|
| 145 |
html += f'<span class="hover-note">{formatted_note}</span></span>'
|
| 146 |
note_number += 1
|
| 147 |
-
i+=width
|
| 148 |
html += "</div>"
|
| 149 |
return html
|
| 150 |
|
|
|
|
| 53 |
attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
|
| 54 |
|
| 55 |
### FIND ZONES OF INTEREST
|
| 56 |
+
threshold_per_token = 0.22
|
| 57 |
kernel_width = 6
|
| 58 |
context_width = 20 # Number of tokens to include as context on each side
|
| 59 |
kernel = np.ones((kernel_width, kernel_width))
|
|
|
|
| 85 |
output_with_notes = [(el, None) for el in generated_tokens[:kernel_width]]
|
| 86 |
for row in range(kernel_width, len(generated_tokens)):
|
| 87 |
best_width, best_patch_start = find_largest_contiguous_patch(significant_areas[row-kernel_width+1])
|
|
|
|
| 88 |
if best_width is not None:
|
| 89 |
output_with_notes.append((generated_tokens[row], (best_width, best_patch_start)))
|
| 90 |
else:
|
| 91 |
output_with_notes.append((generated_tokens[row], None))
|
| 92 |
|
|
|
|
| 93 |
# Fuse the notes for consecutive output tokens if necessary
|
| 94 |
for i in range(len(output_with_notes)):
|
| 95 |
token, coords = output_with_notes[i]
|
| 96 |
if coords is not None:
|
| 97 |
best_width, best_patch_start = coords
|
| 98 |
note_width_generated = kernel_width
|
| 99 |
+
for next_id in range(i+1, min(i+2*kernel_width, len(output_with_notes))):
|
| 100 |
next_token, next_coords = output_with_notes[next_id]
|
| 101 |
if next_coords is not None:
|
| 102 |
next_width, next_patch_start = next_coords
|
| 103 |
+
if best_patch_start + best_width >= next_patch_start:
|
| 104 |
# then notes are overlapping: thus we delete the last one and make the first wider if needed
|
| 105 |
output_with_notes[next_id] = (next_token, None)
|
| 106 |
larger_end = max(best_patch_start + best_width, next_patch_start + next_width)
|
|
|
|
| 110 |
else:
|
| 111 |
output_with_notes[i] = (token, None, None)
|
| 112 |
|
| 113 |
+
# Convert to text slices
|
| 114 |
for i, (token, coords, width) in enumerate(output_with_notes):
|
| 115 |
if coords is not None:
|
| 116 |
best_width, best_patch_start = coords
|
|
|
|
| 121 |
first_part = "".join(input_tokens[context_start:significant_start])
|
| 122 |
significant_part = "".join(input_tokens[significant_start:significant_end])
|
| 123 |
final_part = "".join(input_tokens[significant_end:context_end])
|
|
|
|
|
|
|
| 124 |
output_with_notes[i] = (token, (first_part, significant_part, final_part), width)
|
| 125 |
|
| 126 |
return output_with_notes
|
|
|
|
| 133 |
(token, notes, width) = output_with_notes[i]
|
| 134 |
if notes is None:
|
| 135 |
html += f'{token}'
|
| 136 |
+
i += 1
|
| 137 |
else:
|
| 138 |
text = "".join([element[0] for element in output_with_notes[i:i+width]])
|
| 139 |
first_part, significant_part, final_part = notes
|
|
|
|
| 141 |
html += f'<span class="hoverable" data-note-id="note-{note_number}">{text}<sup>[{note_number+1}]</sup>'
|
| 142 |
html += f'<span class="hover-note">{formatted_note}</span></span>'
|
| 143 |
note_number += 1
|
| 144 |
+
i += width
|
| 145 |
html += "</div>"
|
| 146 |
return html
|
| 147 |
|