Spaces:

ParScale
/

Parallel_Scaling_Law

Sleeping

App Files Files Community

chenmouxiang commited on May 16

Commit

0cd46d4

verified ·

1 Parent(s): 7b9ce4e

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -12

app.py CHANGED Viewed

@@ -50,12 +50,12 @@ def generate_plot(E, A, k, alpha):
     return plt
-OUTPUT_TEMPLATE = """Loss for a {n}B model when P={p} is: **{loss}**. It is equivalant to:
-- A {n1}B model with P=1;
-- A {n2}B model with P=2;
-- A {n4}B model with P=4;
-- A {n8}B model with P=8;
 Note: The equivalent parameters are for reference only. In some reasoning tasks, scaling the parallel streams will obtain more performance gains than the loss benefits!
@@ -77,13 +77,12 @@ def process_inputs(E, A, k, alpha, n, p):
 # Create interface
-HEAD = """# Parallel Scaling Law Visualization
-$$
-\\text{Loss}=E+\\left(
-    \\frac{A}{\\text{Parameters}\\times (1+k\\log P)}
-\\right)^{\\alpha}
-$$
 """
 with gr.Blocks() as demo:
@@ -91,6 +90,12 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
             # Input values
             N = gr.Number(value=2.8, label="N: Number of Non-Embedding Model Parameters (in Billion)")
@@ -111,11 +116,12 @@ with gr.Blocks() as demo:
             param_k = gr.Number(value=PARAM_SETS["Stack-V2-Python"]['k'], label="k")
             param_alpha = gr.Number(value=PARAM_SETS["Stack-V2-Python"]['alpha'], label="alpha")
-            submit_btn = gr.Button("Estimate Loss and Equivalant Model Parameters")
         plot, output = process_inputs(PARAM_SETS["Stack-V2-Python"]['E'], PARAM_SETS["Stack-V2-Python"]['A'], PARAM_SETS["Stack-V2-Python"]['k'], PARAM_SETS["Stack-V2-Python"]['alpha'], 2.8, 4)
         with gr.Column():
             # Output section
             plot_output = gr.Plot(label="Scaling Law Curve", value=plot)
             result_output = gr.Markdown(label="Result", value=output)

     return plt
+OUTPUT_TEMPLATE = """Loss for a {n}B model when P={p} is: **{loss:.5f}**. It is equivalant to:
+- A **{n1}B** model with **P=1**;
+- A **{n2}B** model with **P=2**;
+- A **{n4}B** model with **P=4**;
+- A **{n8}B** model with **P=8**;
 Note: The equivalent parameters are for reference only. In some reasoning tasks, scaling the parallel streams will obtain more performance gains than the loss benefits!
 # Create interface
+HEAD = """<div align="center">
+# Parallel Scaling Law Visualization
+[![Paper](https://img.shields.io/badge/arXiv-2505.10475-red)](https://arxiv.org/abs/2505.10475)
+</div>
 """
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
+            gr.Markdown("""$$
+\\text{Loss}=E+\\left(
+    \\frac{A}{\\text{Parameters}\\times (1+k\\log P)}
+\\right)^{\\alpha}
+$$""")
             # Input values
             N = gr.Number(value=2.8, label="N: Number of Non-Embedding Model Parameters (in Billion)")
             param_k = gr.Number(value=PARAM_SETS["Stack-V2-Python"]['k'], label="k")
             param_alpha = gr.Number(value=PARAM_SETS["Stack-V2-Python"]['alpha'], label="alpha")
         plot, output = process_inputs(PARAM_SETS["Stack-V2-Python"]['E'], PARAM_SETS["Stack-V2-Python"]['A'], PARAM_SETS["Stack-V2-Python"]['k'], PARAM_SETS["Stack-V2-Python"]['alpha'], 2.8, 4)
         with gr.Column():
+            submit_btn = gr.Button("Calculate")
             # Output section
             plot_output = gr.Plot(label="Scaling Law Curve", value=plot)
             result_output = gr.Markdown(label="Result", value=output)