Update app.py
Browse files
app.py
CHANGED
|
@@ -17,11 +17,13 @@ def get_client(model_name):
|
|
| 17 |
|
| 18 |
def analyze_file_content(content, file_type):
|
| 19 |
"""ํ์ผ ๋ด์ฉ์ ๋ถ์ํ์ฌ ๊ตฌ์กฐ์ ์์ฝ์ ๋ฐํ"""
|
| 20 |
-
if file_type
|
| 21 |
try:
|
| 22 |
-
#
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
| 25 |
return f"๋ฐ์ดํฐ์
๊ตฌ์กฐ: {columns}๊ฐ ์ปฌ๋ผ, {rows}๊ฐ ๋ฐ์ดํฐ ์ํ"
|
| 26 |
except:
|
| 27 |
return "๋ฐ์ดํฐ์
๊ตฌ์กฐ ๋ถ์ ์คํจ"
|
|
@@ -31,14 +33,12 @@ def analyze_file_content(content, file_type):
|
|
| 31 |
total_lines = len(lines)
|
| 32 |
non_empty_lines = len([line for line in lines if line.strip()])
|
| 33 |
|
| 34 |
-
# ์ฝ๋ ํ์ผ ํน์ง ๋ถ์
|
| 35 |
if any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function']):
|
| 36 |
functions = len([line for line in lines if 'def ' in line])
|
| 37 |
classes = len([line for line in lines if 'class ' in line])
|
| 38 |
imports = len([line for line in lines if 'import ' in line or 'from ' in line])
|
| 39 |
return f"์ฝ๋ ๊ตฌ์กฐ ๋ถ์: ์ด {total_lines}์ค (ํจ์ {functions}๊ฐ, ํด๋์ค {classes}๊ฐ, ์ํฌํธ {imports}๊ฐ)"
|
| 40 |
|
| 41 |
-
# ์ผ๋ฐ ํ
์คํธ ๋ฌธ์ ๋ถ์
|
| 42 |
paragraphs = content.count('\n\n') + 1
|
| 43 |
words = len(content.split())
|
| 44 |
return f"๋ฌธ์ ๊ตฌ์กฐ ๋ถ์: ์ด {total_lines}์ค, {paragraphs}๊ฐ ๋ฌธ๋จ, ์ฝ {words}๊ฐ ๋จ์ด"
|
|
@@ -47,14 +47,23 @@ def read_uploaded_file(file):
|
|
| 47 |
if file is None:
|
| 48 |
return "", ""
|
| 49 |
try:
|
| 50 |
-
|
|
|
|
|
|
|
| 51 |
df = pd.read_parquet(file.name, engine='pyarrow')
|
| 52 |
content = df.head(10).to_markdown(index=False)
|
| 53 |
return content, "parquet"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
else:
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
content = content.decode('utf-8')
|
| 58 |
return content, "text"
|
| 59 |
except Exception as e:
|
| 60 |
return f"ํ์ผ์ ์ฝ๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}", "error"
|
|
@@ -73,8 +82,10 @@ def chat(message, history, uploaded_file, model_name, system_message="", max_tok
|
|
| 73 |
1. ํ์ผ์ ์ ๋ฐ์ ์ธ ๊ตฌ์กฐ์ ๊ตฌ์ฑ
|
| 74 |
2. ์ฃผ์ ๋ด์ฉ๊ณผ ํจํด ๋ถ์
|
| 75 |
3. ๋ฐ์ดํฐ์ ํน์ง๊ณผ ์๋ฏธ
|
|
|
|
|
|
|
| 76 |
4. ์ ์ฌ์ ํ์ฉ ๋ฐฉ์
|
| 77 |
-
5.
|
| 78 |
|
| 79 |
์ ๋ฌธ๊ฐ์ ๊ด์ ์์ ์์ธํ๊ณ ๊ตฌ์กฐ์ ์ธ ๋ถ์์ ์ ๊ณตํ๋, ์ดํดํ๊ธฐ ์ฝ๊ฒ ์ค๋ช
ํ์ธ์. ๋ถ์ ๊ฒฐ๊ณผ๋ Markdown ํ์์ผ๋ก ์์ฑํ๊ณ , ๊ฐ๋ฅํ ํ ๊ตฌ์ฒด์ ์ธ ์์๋ฅผ ํฌํจํ์ธ์."""
|
| 80 |
|
|
@@ -87,7 +98,7 @@ def chat(message, history, uploaded_file, model_name, system_message="", max_tok
|
|
| 87 |
# ํ์ผ ๋ด์ฉ ๋ถ์ ๋ฐ ๊ตฌ์กฐ์ ์์ฝ
|
| 88 |
file_summary = analyze_file_content(content, file_type)
|
| 89 |
|
| 90 |
-
if file_type
|
| 91 |
system_message += f"\n\nํ์ผ ๋ด์ฉ:\n```markdown\n{content}\n```"
|
| 92 |
else:
|
| 93 |
system_message += f"\n\nํ์ผ ๋ด์ฉ:\n```\n{content}\n```"
|
|
@@ -152,8 +163,8 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
|
|
| 152 |
)
|
| 153 |
|
| 154 |
file_upload = gr.File(
|
| 155 |
-
label="ํ์ผ ์
๋ก๋ (ํ
์คํธ, ์ฝ๋,
|
| 156 |
-
file_types=["text", ".parquet"],
|
| 157 |
type="filepath"
|
| 158 |
)
|
| 159 |
|
|
|
|
| 17 |
|
| 18 |
def analyze_file_content(content, file_type):
|
| 19 |
"""ํ์ผ ๋ด์ฉ์ ๋ถ์ํ์ฌ ๊ตฌ์กฐ์ ์์ฝ์ ๋ฐํ"""
|
| 20 |
+
if file_type in ['parquet', 'csv']:
|
| 21 |
try:
|
| 22 |
+
# ๋ฐ์ดํฐ์
๊ตฌ์กฐ ๋ถ์
|
| 23 |
+
lines = content.split('\n')
|
| 24 |
+
header = lines[0]
|
| 25 |
+
columns = header.count('|') - 1
|
| 26 |
+
rows = len(lines) - 3 # ํค๋์ ๊ตฌ๋ถ์ ์ ์ธ
|
| 27 |
return f"๋ฐ์ดํฐ์
๊ตฌ์กฐ: {columns}๊ฐ ์ปฌ๋ผ, {rows}๊ฐ ๋ฐ์ดํฐ ์ํ"
|
| 28 |
except:
|
| 29 |
return "๋ฐ์ดํฐ์
๊ตฌ์กฐ ๋ถ์ ์คํจ"
|
|
|
|
| 33 |
total_lines = len(lines)
|
| 34 |
non_empty_lines = len([line for line in lines if line.strip()])
|
| 35 |
|
|
|
|
| 36 |
if any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function']):
|
| 37 |
functions = len([line for line in lines if 'def ' in line])
|
| 38 |
classes = len([line for line in lines if 'class ' in line])
|
| 39 |
imports = len([line for line in lines if 'import ' in line or 'from ' in line])
|
| 40 |
return f"์ฝ๋ ๊ตฌ์กฐ ๋ถ์: ์ด {total_lines}์ค (ํจ์ {functions}๊ฐ, ํด๋์ค {classes}๊ฐ, ์ํฌํธ {imports}๊ฐ)"
|
| 41 |
|
|
|
|
| 42 |
paragraphs = content.count('\n\n') + 1
|
| 43 |
words = len(content.split())
|
| 44 |
return f"๋ฌธ์ ๊ตฌ์กฐ ๋ถ์: ์ด {total_lines}์ค, {paragraphs}๊ฐ ๋ฌธ๋จ, ์ฝ {words}๊ฐ ๋จ์ด"
|
|
|
|
| 47 |
if file is None:
|
| 48 |
return "", ""
|
| 49 |
try:
|
| 50 |
+
file_ext = os.path.splitext(file.name)[1].lower()
|
| 51 |
+
|
| 52 |
+
if file_ext == '.parquet':
|
| 53 |
df = pd.read_parquet(file.name, engine='pyarrow')
|
| 54 |
content = df.head(10).to_markdown(index=False)
|
| 55 |
return content, "parquet"
|
| 56 |
+
elif file_ext == '.csv':
|
| 57 |
+
df = pd.read_csv(file.name)
|
| 58 |
+
content = f"๋ฐ์ดํฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:\n{df.head(10).to_markdown(index=False)}\n\n"
|
| 59 |
+
content += f"\n๋ฐ์ดํฐ ์ ๋ณด:\n"
|
| 60 |
+
content += f"- ์ด ํ ์: {len(df)}\n"
|
| 61 |
+
content += f"- ์ด ์ด ์: {len(df.columns)}\n"
|
| 62 |
+
content += f"- ์ปฌ๋ผ ๋ชฉ๋ก: {', '.join(df.columns)}\n"
|
| 63 |
+
return content, "csv"
|
| 64 |
else:
|
| 65 |
+
with open(file.name, 'r', encoding='utf-8') as f:
|
| 66 |
+
content = f.read()
|
|
|
|
| 67 |
return content, "text"
|
| 68 |
except Exception as e:
|
| 69 |
return f"ํ์ผ์ ์ฝ๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}", "error"
|
|
|
|
| 82 |
1. ํ์ผ์ ์ ๋ฐ์ ์ธ ๊ตฌ์กฐ์ ๊ตฌ์ฑ
|
| 83 |
2. ์ฃผ์ ๋ด์ฉ๊ณผ ํจํด ๋ถ์
|
| 84 |
3. ๋ฐ์ดํฐ์ ํน์ง๊ณผ ์๋ฏธ
|
| 85 |
+
- ๋ฐ์ดํฐ์
์ ๊ฒฝ์ฐ: ์ปฌ๋ผ์ ์๋ฏธ, ๋ฐ์ดํฐ ํ์
, ๊ฐ์ ๋ถํฌ
|
| 86 |
+
- ํ
์คํธ/์ฝ๋์ ๊ฒฝ์ฐ: ๊ตฌ์กฐ์ ํน์ง, ์ฃผ์ ํจํด
|
| 87 |
4. ์ ์ฌ์ ํ์ฉ ๋ฐฉ์
|
| 88 |
+
5. ๋ฐ์ดํฐ ํ์ง ๋ฐ ๊ฐ์ ๊ฐ๋ฅํ ๋ถ๋ถ
|
| 89 |
|
| 90 |
์ ๋ฌธ๊ฐ์ ๊ด์ ์์ ์์ธํ๊ณ ๊ตฌ์กฐ์ ์ธ ๋ถ์์ ์ ๊ณตํ๋, ์ดํดํ๊ธฐ ์ฝ๊ฒ ์ค๋ช
ํ์ธ์. ๋ถ์ ๊ฒฐ๊ณผ๋ Markdown ํ์์ผ๋ก ์์ฑํ๊ณ , ๊ฐ๋ฅํ ํ ๊ตฌ์ฒด์ ์ธ ์์๋ฅผ ํฌํจํ์ธ์."""
|
| 91 |
|
|
|
|
| 98 |
# ํ์ผ ๋ด์ฉ ๋ถ์ ๋ฐ ๊ตฌ์กฐ์ ์์ฝ
|
| 99 |
file_summary = analyze_file_content(content, file_type)
|
| 100 |
|
| 101 |
+
if file_type in ['parquet', 'csv']:
|
| 102 |
system_message += f"\n\nํ์ผ ๋ด์ฉ:\n```markdown\n{content}\n```"
|
| 103 |
else:
|
| 104 |
system_message += f"\n\nํ์ผ ๋ด์ฉ:\n```\n{content}\n```"
|
|
|
|
| 163 |
)
|
| 164 |
|
| 165 |
file_upload = gr.File(
|
| 166 |
+
label="ํ์ผ ์
๋ก๋ (ํ
์คํธ, ์ฝ๋, CSV, Parquet ํ์ผ)",
|
| 167 |
+
file_types=["text", ".csv", ".parquet"],
|
| 168 |
type="filepath"
|
| 169 |
)
|
| 170 |
|