Spaces:
Runtime error
Runtime error
Ganti keseluruhan UI untuk menggunakan model tersedia text ranking
Browse files- src/streamlit_app.py +66 -38
src/streamlit_app.py
CHANGED
|
@@ -1,40 +1,68 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
| 4 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
In the meantime, below is an example of what you can do with just a few lines of code:
|
| 14 |
-
"""
|
| 15 |
-
|
| 16 |
-
num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
|
| 17 |
-
num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
|
| 18 |
-
|
| 19 |
-
indices = np.linspace(0, 1, num_points)
|
| 20 |
-
theta = 2 * np.pi * num_turns * indices
|
| 21 |
-
radius = indices
|
| 22 |
-
|
| 23 |
-
x = radius * np.cos(theta)
|
| 24 |
-
y = radius * np.sin(theta)
|
| 25 |
-
|
| 26 |
-
df = pd.DataFrame({
|
| 27 |
-
"x": x,
|
| 28 |
-
"y": y,
|
| 29 |
-
"idx": indices,
|
| 30 |
-
"rand": np.random.randn(num_points),
|
| 31 |
-
})
|
| 32 |
-
|
| 33 |
-
st.altair_chart(alt.Chart(df, height=700, width=700)
|
| 34 |
-
.mark_point(filled=True)
|
| 35 |
-
.encode(
|
| 36 |
-
x=alt.X("x", axis=None),
|
| 37 |
-
y=alt.Y("y", axis=None),
|
| 38 |
-
color=alt.Color("idx", legend=None, scale=alt.Scale()),
|
| 39 |
-
size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
|
| 40 |
-
))
|
|
|
|
| 1 |
+
# Install paket yang dibutuhkan (jalankan ini di Colab atau lokal)
|
| 2 |
+
!pip install streamlit sentence-transformers transformers
|
| 3 |
+
|
| 4 |
+
import time
|
| 5 |
import streamlit as st
|
| 6 |
+
import torch
|
| 7 |
+
from sentence_transformers import SentenceTransformer, CrossEncoder
|
| 8 |
+
|
| 9 |
+
# Load Bi-Encoder dan Cross-Encoder
|
| 10 |
+
bi_encoder = SentenceTransformer('sentence-transformers/msmarco-distilbert-base-v3')
|
| 11 |
+
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
|
| 12 |
+
|
| 13 |
+
# UI dengan Streamlit
|
| 14 |
+
st.title("π Perbandingan Bi-Encoder vs Cross-Encoder")
|
| 15 |
+
st.subheader("Masukkan Query dan Passages untuk melihat perbandingan ranking")
|
| 16 |
+
|
| 17 |
+
# Input untuk query dan passages
|
| 18 |
+
query = st.text_input("Masukkan Query:", "Apa manfaat AI dalam kehidupan sehari-hari?")
|
| 19 |
+
passages = st.text_area("Masukkan Passages (pisahkan dengan enter):",
|
| 20 |
+
"AI membantu mengoptimalkan pencarian informasi dan otomatisasi tugas.\n"
|
| 21 |
+
"Teknologi AI telah berkembang pesat dalam bidang kesehatan dan pendidikan.\n"
|
| 22 |
+
"AI digunakan untuk meningkatkan efisiensi industri seperti manufaktur dan e-commerce.")
|
| 23 |
+
|
| 24 |
+
# Konversi input passages ke list
|
| 25 |
+
passages = passages.split("\n")
|
| 26 |
+
|
| 27 |
+
if st.button("Jalankan Model"):
|
| 28 |
+
st.write("π **Menganalisis passages dengan dua model...**")
|
| 29 |
+
|
| 30 |
+
# --- Bi-Encoder ---
|
| 31 |
+
start_time = time.time()
|
| 32 |
+
query_emb = bi_encoder.encode(query)
|
| 33 |
+
passage_embs = bi_encoder.encode(passages)
|
| 34 |
+
scores_bi = [torch.cosine_similarity(torch.tensor(query_emb), torch.tensor(p_emb), dim=0).item() for p_emb in passage_embs]
|
| 35 |
+
bi_time = time.time() - start_time # Waktu eksekusi Bi-Encoder
|
| 36 |
+
|
| 37 |
+
# --- Cross-Encoder ---
|
| 38 |
+
start_time = time.time()
|
| 39 |
+
scores_cross = cross_encoder.predict([[query, passage] for passage in passages])
|
| 40 |
+
cross_time = time.time() - start_time # Waktu eksekusi Cross-Encoder
|
| 41 |
+
|
| 42 |
+
# Hitung Mean Reciprocal Rank (MRR)
|
| 43 |
+
def compute_mrr(scores):
|
| 44 |
+
ranked_scores = sorted(scores, reverse=True)
|
| 45 |
+
if len(ranked_scores) == 0:
|
| 46 |
+
return 0.0
|
| 47 |
+
return 1 / (ranked_scores.index(max(scores)) + 1)
|
| 48 |
+
|
| 49 |
+
mrr_bi = compute_mrr(scores_bi)
|
| 50 |
+
mrr_cross = compute_mrr(scores_cross)
|
| 51 |
+
|
| 52 |
+
# **Tampilkan Hasil**
|
| 53 |
+
st.write("β
**Hasil Ranking (Bi-Encoder)**")
|
| 54 |
+
sorted_bi = sorted(zip(passages, scores_bi), key=lambda x: x[1], reverse=True)
|
| 55 |
+
for i, (text, score) in enumerate(sorted_bi):
|
| 56 |
+
st.write(f"{i+1}. **[{score:.4f}]** {text}")
|
| 57 |
+
|
| 58 |
+
st.write("β
**Hasil Ranking (Cross-Encoder)**")
|
| 59 |
+
sorted_cross = sorted(zip(passages, scores_cross), key=lambda x: x[1], reverse=True)
|
| 60 |
+
for i, (text, score) in enumerate(sorted_cross):
|
| 61 |
+
st.write(f"{i+1}. **[{score:.4f}]** {text}")
|
| 62 |
|
| 63 |
+
# **Tampilkan metrik**
|
| 64 |
+
st.subheader("π Perbandingan Model")
|
| 65 |
+
st.write(f"β± **Waktu Eksekusi Bi-Encoder:** {bi_time:.3f} detik")
|
| 66 |
+
st.write(f"β± **Waktu Eksekusi Cross-Encoder:** {cross_time:.3f} detik")
|
| 67 |
+
st.write(f"π **MRR Bi-Encoder:** {mrr_bi:.3f}")
|
| 68 |
+
st.write(f"π **MRR Cross-Encoder:** {mrr_cross:.3f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|