adtyalan commited on
Commit
6f08448
Β·
verified Β·
1 Parent(s): 52913f5

Ganti keseluruhan UI untuk menggunakan model tersedia text ranking

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +66 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,68 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
 
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
1
+ # Install paket yang dibutuhkan (jalankan ini di Colab atau lokal)
2
+ !pip install streamlit sentence-transformers transformers
3
+
4
+ import time
5
  import streamlit as st
6
+ import torch
7
+ from sentence_transformers import SentenceTransformer, CrossEncoder
8
+
9
+ # Load Bi-Encoder dan Cross-Encoder
10
+ bi_encoder = SentenceTransformer('sentence-transformers/msmarco-distilbert-base-v3')
11
+ cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
12
+
13
+ # UI dengan Streamlit
14
+ st.title("πŸ” Perbandingan Bi-Encoder vs Cross-Encoder")
15
+ st.subheader("Masukkan Query dan Passages untuk melihat perbandingan ranking")
16
+
17
+ # Input untuk query dan passages
18
+ query = st.text_input("Masukkan Query:", "Apa manfaat AI dalam kehidupan sehari-hari?")
19
+ passages = st.text_area("Masukkan Passages (pisahkan dengan enter):",
20
+ "AI membantu mengoptimalkan pencarian informasi dan otomatisasi tugas.\n"
21
+ "Teknologi AI telah berkembang pesat dalam bidang kesehatan dan pendidikan.\n"
22
+ "AI digunakan untuk meningkatkan efisiensi industri seperti manufaktur dan e-commerce.")
23
+
24
+ # Konversi input passages ke list
25
+ passages = passages.split("\n")
26
+
27
+ if st.button("Jalankan Model"):
28
+ st.write("πŸš€ **Menganalisis passages dengan dua model...**")
29
+
30
+ # --- Bi-Encoder ---
31
+ start_time = time.time()
32
+ query_emb = bi_encoder.encode(query)
33
+ passage_embs = bi_encoder.encode(passages)
34
+ scores_bi = [torch.cosine_similarity(torch.tensor(query_emb), torch.tensor(p_emb), dim=0).item() for p_emb in passage_embs]
35
+ bi_time = time.time() - start_time # Waktu eksekusi Bi-Encoder
36
+
37
+ # --- Cross-Encoder ---
38
+ start_time = time.time()
39
+ scores_cross = cross_encoder.predict([[query, passage] for passage in passages])
40
+ cross_time = time.time() - start_time # Waktu eksekusi Cross-Encoder
41
+
42
+ # Hitung Mean Reciprocal Rank (MRR)
43
+ def compute_mrr(scores):
44
+ ranked_scores = sorted(scores, reverse=True)
45
+ if len(ranked_scores) == 0:
46
+ return 0.0
47
+ return 1 / (ranked_scores.index(max(scores)) + 1)
48
+
49
+ mrr_bi = compute_mrr(scores_bi)
50
+ mrr_cross = compute_mrr(scores_cross)
51
+
52
+ # **Tampilkan Hasil**
53
+ st.write("βœ… **Hasil Ranking (Bi-Encoder)**")
54
+ sorted_bi = sorted(zip(passages, scores_bi), key=lambda x: x[1], reverse=True)
55
+ for i, (text, score) in enumerate(sorted_bi):
56
+ st.write(f"{i+1}. **[{score:.4f}]** {text}")
57
+
58
+ st.write("βœ… **Hasil Ranking (Cross-Encoder)**")
59
+ sorted_cross = sorted(zip(passages, scores_cross), key=lambda x: x[1], reverse=True)
60
+ for i, (text, score) in enumerate(sorted_cross):
61
+ st.write(f"{i+1}. **[{score:.4f}]** {text}")
62
 
63
+ # **Tampilkan metrik**
64
+ st.subheader("πŸ“Š Perbandingan Model")
65
+ st.write(f"⏱ **Waktu Eksekusi Bi-Encoder:** {bi_time:.3f} detik")
66
+ st.write(f"⏱ **Waktu Eksekusi Cross-Encoder:** {cross_time:.3f} detik")
67
+ st.write(f"πŸ“ˆ **MRR Bi-Encoder:** {mrr_bi:.3f}")
68
+ st.write(f"πŸ“ˆ **MRR Cross-Encoder:** {mrr_cross:.3f}")