Spaces:

ParScale
/

Parallel_Scaling_Law

Running

App Files Files Community

chenmouxiang commited on May 16

Commit

7b9ce4e

verified ·

1 Parent(s): 087cecd

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -46

app.py CHANGED Viewed

@@ -1,91 +1,146 @@
 import gradio as gr
 import matplotlib.pyplot as plt
 import numpy as np
 # Predefined hyperparameter sets
 PARAM_SETS = {
-    "Set A": {"param1": 0.1, "param2": 0.01, "param3": 100, "param4": 50},
-    "Set B": {"param1": 0.2, "param2": 0.02, "param3": 200, "param4": 100}
 }
-def generate_plot(param1, param2, param3, param4):
-    """Generate visualization based on hyperparameters"""
-    plt.figure(figsize=(10, 6))
-    x = np.linspace(0, 10, int(param3))
-    y = np.sin(x * param1) * np.cos(x * param2) * param4
-    plt.plot(x, y)
-    plt.title(f'Parameter Visualization (p1={param1}, p2={param2}, p3={param3}, p4={param4})')
-    plt.grid(True)
     return plt
-def process_inputs(param_set, custom_param1, custom_param2, custom_param3, custom_param4,
-                  input1, input2):
     """Process inputs and return results"""
-    # Determine which parameter set to use
-    if param_set in PARAM_SETS:
-        params = PARAM_SETS[param_set]
-        p1, p2, p3, p4 = params.values()
-    else:
-        p1, p2, p3, p4 = custom_param1, custom_param2, custom_param3, custom_param4
-    # Generate plot
-    plot = generate_plot(p1, p2, p3, p4)
-    # Calculate result (example calculation)
-    result = (input1 * p1 + input2 * p2) * (p3 + p4)
-    return plot, result
 # Create interface
 with gr.Blocks() as demo:
-    gr.Markdown("# Hyperparameter Calculation and Visualization System")
     with gr.Row():
         with gr.Column():
             # Hyperparameter selection section
             param_set = gr.Dropdown(
                 choices=["Custom"] + list(PARAM_SETS.keys()),
-                value="Custom",
-                label="Select Hyperparameter Set"
             )
             # Custom parameter inputs
-            custom_param1 = gr.Number(value=0.1, label="Parameter 1 (Learning Rate)")
-            custom_param2 = gr.Number(value=0.01, label="Parameter 2 (Weight Decay)")
-            custom_param3 = gr.Number(value=100, label="Parameter 3 (Iterations)")
-            custom_param4 = gr.Number(value=50, label="Parameter 4 (Batch Size)")
-            # Input values
-            input1 = gr.Number(value=1.0, label="Input Value 1")
-            input2 = gr.Number(value=1.0, label="Input Value 2")
-            submit_btn = gr.Button("Calculate")
         with gr.Column():
             # Output section
-            plot_output = gr.Plot(label="Parameter Visualization")
-            result_output = gr.Number(label="Calculation Result")
     # Auto-fill parameters when selecting predefined sets
     def update_params(param_set):
         if param_set in PARAM_SETS:
             params = PARAM_SETS[param_set]
-            return [params["param1"], params["param2"], params["param3"], params["param4"]]
         return [gr.skip(), gr.skip(), gr.skip(), gr.skip()]
     param_set.change(
         update_params,
         inputs=[param_set],
-        outputs=[custom_param1, custom_param2, custom_param3, custom_param4]
     )
     # Submit button event
-    submit_btn.click(
         process_inputs,
-        inputs=[param_set, custom_param1, custom_param2, custom_param3, custom_param4,
-                input1, input2],
         outputs=[plot_output, result_output]
     )
-# Launch application
-demo.launch()

 import gradio as gr
 import matplotlib.pyplot as plt
 import numpy as np
+import math
+from matplotlib.ticker import FuncFormatter
 # Predefined hyperparameter sets
 PARAM_SETS = {
+    "Stack-V2-Python": {"E": 0.69123678, "A": 0.01130616 * 1e9, "k": 0.393463, "alpha": 0.18937067},
+    "Pile": {"E": 1.28254036, "A": 0.2035367 * 1e9, "k": 0.33027934, "alpha": 0.19479807}
 }
+def pred_loss(E, A, k, alpha, n, p):
+    return E + (A / (n * (1 + np.log(p) * k))) ** alpha
+def generate_plot(E, A, k, alpha):
+    plt.clf()
+    colors = ['#2B83BA', '#7BB7D6', '#ED7D5F', '#D7191C']
+    ax = plt.gca()
+    for i, p in enumerate([1, 2, 4, 8]):
+        x_plot = np.linspace(535813376 * 0.9, 4353203200 * 1.1, 100)
+        y_plot = pred_loss(E, A, k, alpha, x_plot, p)
+        ax.plot(x_plot, y_plot, marker=None, markersize=1, linewidth=3, color=colors[int(math.log(p, 2))], label=f"$P={p}$")
+    ax.legend(fontsize=12)
+    # ax.set_xscale("log")
+    # ax.set_yscale("log")
+    def billions(x, pos):
+        if x < 1e9:
+            result = ""
+        else:
+            result = f'{x * 1e-9:.1f}B'
+        return result
+    ax.xaxis.set_major_formatter(FuncFormatter(billions))
+    ax.xaxis.set_minor_formatter(FuncFormatter(billions))
+    ax.yaxis.set_major_formatter(FuncFormatter(lambda x, pos: f"{x:.2f}"))
+    ax.yaxis.set_minor_formatter(FuncFormatter(lambda x, pos: f"{x:.2f}"))
+    ax.set_xlim(535813376 * 0.9, 4353203200 * 1.1)
+    ax.set_ylim(ax.get_ylim()[0] * 1, ax.get_ylim()[1] * 1.01)
+    ax.text(0.03, 0.03, f"$E={E}$\n$A={A}$\n$k={k}$\n$\\alpha={alpha}$", transform=ax.transAxes, fontsize=10, verticalalignment='bottom', multialignment='left')
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    ax.set_xlabel('Parameters (Non-Embedding)', fontsize=12)
+    ax.set_ylabel(f'Loss', fontsize=12)
     return plt
+OUTPUT_TEMPLATE = """Loss for a {n}B model when P={p} is: **{loss}**. It is equivalant to:
+- A {n1}B model with P=1;
+- A {n2}B model with P=2;
+- A {n4}B model with P=4;
+- A {n8}B model with P=8;
+Note: The equivalent parameters are for reference only. In some reasoning tasks, scaling the parallel streams will obtain more performance gains than the loss benefits!
+Enjoy it! 😊"""
+def process_inputs(E, A, k, alpha, n, p):
     """Process inputs and return results"""
+    if n < 1000:
+        n = n * 1e9
+    plot = generate_plot(E, A, k, alpha)
+    loss = pred_loss(E, A, k, alpha, n, p)
+    n1 = n * (k * np.log(p) + 1) / (k * np.log(1) + 1) / 1e9
+    n2 = n * (k * np.log(p) + 1) / (k * np.log(2) + 1) / 1e9
+    n4 = n * (k * np.log(p) + 1) / (k * np.log(4) + 1) / 1e9
+    n8 = n * (k * np.log(p) + 1) / (k * np.log(8) + 1) / 1e9
+    return plot, OUTPUT_TEMPLATE.format(n=round(n / 1e9, 2), p=p, n1=round(n1, 2), n2=round(n2, 2), n4=round(n4, 2), n8=round(n8, 2), loss=loss)
 # Create interface
+HEAD = """# Parallel Scaling Law Visualization
+$$
+\\text{Loss}=E+\\left(
+    \\frac{A}{\\text{Parameters}\\times (1+k\\log P)}
+\\right)^{\\alpha}
+$$
+"""
 with gr.Blocks() as demo:
+    gr.Markdown(HEAD)
     with gr.Row():
         with gr.Column():
+            # Input values
+            N = gr.Number(value=2.8, label="N: Number of Non-Embedding Model Parameters (in Billion)")
+            P = gr.Number(value=4, label="P: Number of Parallel Streams")
+            gr.Markdown("---")
             # Hyperparameter selection section
             param_set = gr.Dropdown(
                 choices=["Custom"] + list(PARAM_SETS.keys()),
+                value=list(PARAM_SETS.keys())[0],
+                label="Select our pre-fitted parameters for two datasets"
             )
             # Custom parameter inputs
+            param_E = gr.Number(value=PARAM_SETS["Stack-V2-Python"]['E'], label="E")
+            param_A = gr.Number(value=PARAM_SETS["Stack-V2-Python"]['A'], label="A")
+            param_k = gr.Number(value=PARAM_SETS["Stack-V2-Python"]['k'], label="k")
+            param_alpha = gr.Number(value=PARAM_SETS["Stack-V2-Python"]['alpha'], label="alpha")
+            submit_btn = gr.Button("Estimate Loss and Equivalant Model Parameters")
+        plot, output = process_inputs(PARAM_SETS["Stack-V2-Python"]['E'], PARAM_SETS["Stack-V2-Python"]['A'], PARAM_SETS["Stack-V2-Python"]['k'], PARAM_SETS["Stack-V2-Python"]['alpha'], 2.8, 4)
         with gr.Column():
             # Output section
+            plot_output = gr.Plot(label="Scaling Law Curve", value=plot)
+            result_output = gr.Markdown(label="Result", value=output)
     # Auto-fill parameters when selecting predefined sets
     def update_params(param_set):
         if param_set in PARAM_SETS:
             params = PARAM_SETS[param_set]
+            return [params["E"], params["A"], params["k"], params["alpha"]]
         return [gr.skip(), gr.skip(), gr.skip(), gr.skip()]
     param_set.change(
         update_params,
         inputs=[param_set],
+        outputs=[param_E, param_A, param_k, param_alpha]
     )
     # Submit button event
+    click_event = submit_btn.click(
         process_inputs,
+        inputs=[param_E, param_A, param_k, param_alpha,
+                N, P],
         outputs=[plot_output, result_output]
     )
+demo.launch()