aliosm commited on
Commit
e246915
·
1 Parent(s): 6e4390a

Initial space app

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. README.md +6 -6
  3. app.py +279 -0
  4. index.tsv +3 -0
  5. requirements.txt +4 -0
.gitattributes CHANGED
@@ -1,3 +1,4 @@
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
1
+ index.tsv filter=lfs diff=lfs merge=lfs -text
2
  *.7z filter=lfs diff=lfs merge=lfs -text
3
  *.arrow filter=lfs diff=lfs merge=lfs -text
4
  *.bin filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,14 +1,14 @@
1
  ---
2
- title: ProphetMosque
3
- emoji: 🔥
4
- colorFrom: pink
5
- colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 5.29.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
- short_description: ابحث في كتب مكتبة المسجد النبوي
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Prophet's Mosque
3
+ emoji: 🕌
4
+ colorFrom: blue
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 5.23.2
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
+ short_description: ابحث في كتب مكتبة المسجد النبوي 🕌
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import json
3
+ import urllib.parse
4
+ import requests
5
+ import tempfile
6
+ import zipfile
7
+ import os
8
+ import re
9
+ import threading
10
+
11
+ from pathlib import Path
12
+
13
+ import gradio as gr
14
+
15
+ from fuzzywuzzy import fuzz
16
+ from pyarabic.araby import strip_tashkeel
17
+
18
+
19
+ def main() -> None:
20
+ with gr.Blocks(
21
+ theme=gr.themes.Default(font=[gr.themes.GoogleFont('Noto Sans Arabic'), 'Arial', 'sans-serif']),
22
+ css='\n'.join([
23
+ 'html, body, .gradio-container { direction: rtl !important; }',
24
+ 'h1 { text-align: center; display: block; }',
25
+ 'th, td { text-align: right !important; }',
26
+ 'th span { white-space: nowrap !important; }',
27
+ '.icon-wrap { right: unset !important; left: var(--size-3) !important; }',
28
+ ])
29
+ ) as demo:
30
+ index_state = gr.State()
31
+ results_data = gr.State()
32
+ selected_book_id = gr.State()
33
+
34
+ gr.Markdown('# ابحث في كتب مكتبة المسجد النبوي 📚', rtl=True)
35
+
36
+ title = gr.Textbox(label='عنوان الكتاب', placeholder='اكتب عنوان الكتاب', lines=1, rtl=True)
37
+
38
+ with gr.Row():
39
+ category = gr.Dropdown(choices=['جارٍ التحميل...'], label='التصنيف (اختياري)', interactive=False)
40
+ author = gr.Dropdown(choices=['جارٍ التحميل...'], label='المؤلف (اختياري)', interactive=False)
41
+
42
+ search_button = gr.Button('ابحث')
43
+
44
+ gr.Markdown('## النتائج 🎯', rtl=True)
45
+
46
+ results = gr.Dataframe(
47
+ headers=[
48
+ '#',
49
+ 'العنوان',
50
+ 'الصفحات',
51
+ 'المؤلف',
52
+ 'التصنيف',
53
+ 'درجة التطابق',
54
+ ],
55
+ interactive=False,
56
+ )
57
+
58
+ download_label = gr.Markdown('### تحميل ملفات الكتاب 📥', visible=False, rtl=True)
59
+
60
+ with gr.Row(visible=False) as download_buttons_box:
61
+ download_pdf_button = gr.Button('تحميل بصيغة PDF')
62
+ download_txt_button = gr.Button('تحميل بصيغة TXT')
63
+ download_docx_button = gr.Button('تحميل بصيغة DOCX')
64
+
65
+ download_output = gr.File(label='تنزيل الملف 📥', visible=False)
66
+
67
+ downloading_text = gr.Markdown('جارٍ تجهيز الملف للتحميل...', visible=False, rtl=True)
68
+
69
+ demo.load(load_data, outputs=[index_state, category, author])
70
+
71
+ search_button.click(
72
+ fn=lambda t, c, a, idx: handle_search(idx, t, c, a),
73
+ inputs=[title, category, author, index_state],
74
+ outputs=[results, results_data, download_buttons_box, download_label, download_output, downloading_text],
75
+ )
76
+
77
+ results.select(
78
+ fn=show_details,
79
+ inputs=[results_data],
80
+ outputs=[download_label, selected_book_id, download_buttons_box],
81
+ )
82
+
83
+ download_pdf_button.click(
84
+ lambda: gr.update(visible=True), inputs=None, outputs=downloading_text
85
+ ).then(
86
+ fn=download_book,
87
+ inputs=[index_state, selected_book_id, gr.State('pdf')],
88
+ outputs=download_output,
89
+ ).then(
90
+ lambda: (gr.update(visible=False), gr.update(visible=True)),
91
+ inputs=None,
92
+ outputs=[downloading_text, download_output],
93
+ )
94
+
95
+ download_txt_button.click(
96
+ lambda: gr.update(visible=True), inputs=None, outputs=downloading_text
97
+ ).then(
98
+ fn=download_book,
99
+ inputs=[index_state, selected_book_id, gr.State('txt')],
100
+ outputs=download_output,
101
+ ).then(
102
+ lambda: (gr.update(visible=False), gr.update(visible=True)),
103
+ inputs=None,
104
+ outputs=[downloading_text, download_output],
105
+ )
106
+
107
+ download_docx_button.click(
108
+ lambda: gr.update(visible=True), inputs=None, outputs=downloading_text
109
+ ).then(
110
+ fn=download_book,
111
+ inputs=[index_state, selected_book_id, gr.State('docx')],
112
+ outputs=download_output,
113
+ ).then(
114
+ lambda: (gr.update(visible=False), gr.update(visible=True)),
115
+ inputs=None,
116
+ outputs=[downloading_text, download_output],
117
+ )
118
+
119
+ demo.launch()
120
+
121
+
122
+ def load_data() -> tuple[list[list[str | int]], gr.Dropdown, gr.Dropdown]:
123
+ _index = load_index()
124
+ _categories = get_categories(_index)
125
+ _authors = get_authors(_index)
126
+
127
+ return (
128
+ _index,
129
+ gr.update(choices=_categories, value=_categories[0], interactive=True),
130
+ gr.update(choices=_authors, value=_authors[0], interactive=True),
131
+ )
132
+
133
+
134
+ def load_index() -> list[list[str | int]]:
135
+ with open('index.tsv', 'r', encoding='utf-8') as file:
136
+ data = list(csv.reader(file, delimiter='\t'))[1:]
137
+
138
+ for i in range(len(data)):
139
+ data[i] = [i + 1] + data[i] + [normalize_text(data[i][2])]
140
+
141
+ return data
142
+
143
+
144
+ def get_categories(index: list[list[str | int]]) -> list[str]:
145
+ return [''] + sorted(set([row[1] for row in index if row[1]]))
146
+
147
+
148
+ def get_authors(index: list[list[str | int]]) -> list[str]:
149
+ return [''] + sorted(set([row[2] for row in index if row[2]]))
150
+
151
+
152
+ def handle_search(
153
+ index: list[list[str | int]],
154
+ title: str,
155
+ category: str,
156
+ author: str,
157
+ ) -> tuple:
158
+ title = normalize_text(title)
159
+
160
+ reset_buttons = gr.update(visible=False)
161
+ reset_label = gr.update(visible=False)
162
+ reset_download_file = gr.update(visible=False)
163
+ reset_downloading_text = gr.update(visible=False)
164
+
165
+ if not title.strip():
166
+ return [
167
+ [['', 'يرجى إدخال عنوان للبحث.', '', '', '']],
168
+ [['', 'يرجى إدخال عنوان للبحث.', '', '', '']],
169
+ reset_buttons,
170
+ reset_label,
171
+ reset_download_file,
172
+ reset_downloading_text,
173
+ ]
174
+
175
+ filtered = index
176
+
177
+ if category and category != '':
178
+ filtered = [row for row in filtered if row[1] == category]
179
+
180
+ if author and author != '':
181
+ filtered = [row for row in filtered if row[2] == author]
182
+
183
+ scored_results = []
184
+ for row in filtered:
185
+ score = fuzz.partial_ratio(title, row[-1])
186
+
187
+ if score > 50:
188
+ scored_results.append((score, row))
189
+
190
+ if not scored_results:
191
+ return [
192
+ [['', 'لم يتم العثور على نتائج مطابقة.', '', '', '']],
193
+ [['', 'لم يتم العثور على نتائج مطابقة.', '', '', '']],
194
+ reset_buttons,
195
+ reset_label,
196
+ reset_download_file,
197
+ reset_downloading_text,
198
+ ]
199
+
200
+ scored_results.sort(reverse=True)
201
+
202
+ result_table = [[row[0], row[3], row[4], row[2], row[1], score] for score, row in scored_results[:100]]
203
+
204
+ return [result_table, result_table, reset_buttons, reset_label, reset_download_file, reset_downloading_text]
205
+
206
+
207
+ def show_details(evt: gr.SelectData, results_data: list[list[str | int]]) -> tuple:
208
+ return [gr.update(visible=True), results_data[evt.index[0]][0], gr.update(visible=True)]
209
+
210
+
211
+ def download_book(index_state: list[list[str | int]], selected_book_id: int, file_type: str) -> str:
212
+ selected_book = index_state[selected_book_id - 1]
213
+ title = f'{selected_book[3]} - {selected_book[2]}'
214
+ file_paths = json.loads(selected_book[5 + {'pdf': 0, 'txt': 1, 'docx': 2}[file_type]].replace("'", '"'))
215
+
216
+ if not file_paths:
217
+ raise Exception('الملف غير متوفر')
218
+
219
+ safe_title = sanitize_filename(title)
220
+
221
+ if len(file_paths) == 1:
222
+ url = f'https://huggingface.co/datasets/ieasybooks-org/prophet-mosque-library/resolve/main/{urllib.parse.quote(file_paths[0][2:])}'
223
+ response = requests.get(url)
224
+ ext = Path(file_paths[0]).suffix
225
+ temp_file_name = os.path.join(tempfile.gettempdir(), f'{safe_title}{ext}')
226
+ temp_file = open(temp_file_name, 'wb')
227
+ temp_file.write(response.content)
228
+ temp_file.close()
229
+
230
+ schedule_file_deletion(temp_file_name)
231
+
232
+ return temp_file_name
233
+ else:
234
+ temp_dir = tempfile.mkdtemp()
235
+ zip_path = os.path.join(temp_dir, f'{safe_title}.zip')
236
+
237
+ with zipfile.ZipFile(zip_path, 'w') as zipf:
238
+ for path in file_paths:
239
+ url = f'https://huggingface.co/datasets/ieasybooks-org/prophet-mosque-library/resolve/main/{urllib.parse.quote(path[2:])}'
240
+ response = requests.get(url)
241
+ filename = os.path.basename(path)
242
+ full_path = os.path.join(temp_dir, filename)
243
+
244
+ with open(full_path, 'wb') as f:
245
+ f.write(response.content)
246
+
247
+ zipf.write(full_path, arcname=filename)
248
+
249
+ schedule_file_deletion(zip_path)
250
+
251
+ return zip_path
252
+
253
+
254
+ def schedule_file_deletion(file_path: str, delay: int = 3):
255
+ def delete_file():
256
+ if os.path.exists(file_path):
257
+ os.remove(file_path)
258
+
259
+ threading.Timer(delay, delete_file).start()
260
+
261
+
262
+ def sanitize_filename(name: str) -> str:
263
+ return re.sub(r'[\\/:"*?<>|]+', '', name).strip()
264
+
265
+
266
+ def normalize_text(text: str) -> str:
267
+ text = strip_tashkeel(text)
268
+
269
+ text = text.replace('أ', 'ا')
270
+ text = text.replace('إ', 'ا')
271
+ text = text.replace('آ', 'ا')
272
+ text = text.replace('ي', 'ى')
273
+ text = text.replace('ة', 'ه')
274
+
275
+ return text
276
+
277
+
278
+ if __name__ == '__main__':
279
+ main()
index.tsv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4775f76e0b050110247b65c8b2ec7933204ec95e79ee8726111f8f4c72f5bc9
3
+ size 51575132
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ PyArabic==0.6.15
2
+ fuzzywuzzy==0.18.0
3
+ gradio==5.23.2
4
+ python-Levenshtein==0.27.1