nimafathi commited on
Commit
30ca731
·
verified ·
1 Parent(s): ff8d7fb

Upload HDLM model with complete HF integration

Browse files
Files changed (1) hide show
  1. config.json +104 -0
config.json ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ngpus": 4,
3
+ "type": "aligned",
4
+ "gradient_accumulation_steps": 2,
5
+ "tokenizer": {
6
+ "tokens": 50257,
7
+ "model": "gpt2"
8
+ },
9
+ "training": {
10
+ "batch_size": 128,
11
+ "accum": 2,
12
+ "n_iters": 1250000,
13
+ "snapshot_freq": 10000,
14
+ "log_freq": 500,
15
+ "eval_freq": 10000,
16
+ "snapshot_freq_for_preemption": 3000,
17
+ "snapshot_sampling": true,
18
+ "ema": 0.9999,
19
+ "warmup_iter": -1,
20
+ "loss_type": "hybrid",
21
+ "epsilon": 0.0,
22
+ "lambda": 0.0
23
+ },
24
+ "data": {
25
+ "train": "openwebtext-train",
26
+ "valid": "wikitext103",
27
+ "cache_dir": "/home/toolkit/research-diffcodegen/data",
28
+ "debug": false
29
+ },
30
+ "graph": {
31
+ "type": "absorb",
32
+ "gamma": 1.0,
33
+ "file": "/home/toolkit/research-diffcodegen/data",
34
+ "report_all": false,
35
+ "expanded_sigma": true
36
+ },
37
+ "noise": {
38
+ "type": "loglinear",
39
+ "sigma_min": 0.0001,
40
+ "sigma_max": 2.0,
41
+ "ar_diffusion": false,
42
+ "expanded_sigma": true
43
+ },
44
+ "sampling": {
45
+ "predictor": "analytic",
46
+ "steps_per_level": 1,
47
+ "noise_removal": true,
48
+ "strategy": "direct",
49
+ "strategy_param": 0.9
50
+ },
51
+ "annealing": {
52
+ "type": "none",
53
+ "efficient": false,
54
+ "width": 1024,
55
+ "tau": 1024,
56
+ "eval_tau": 1024,
57
+ "steps_per_level": 1,
58
+ "sampling_method": "sdlm",
59
+ "diffusion_loss_weight": 1.0,
60
+ "ce_loss_weight": 1.0,
61
+ "sampling_eps": 0.0001,
62
+ "attention": {
63
+ "context_type": "block_causal",
64
+ "block_type": "full"
65
+ },
66
+ "match_inference": false
67
+ },
68
+ "eval": {
69
+ "batch_size": 16,
70
+ "perplexity": true,
71
+ "perplexity_batch_size": 8
72
+ },
73
+ "optim": {
74
+ "weight_decay": 0.1,
75
+ "optimizer": "AdamW",
76
+ "lr": 0.0002,
77
+ "beta1": 0.9,
78
+ "beta2": 0.95,
79
+ "eps": 1e-08,
80
+ "warmup": 10000,
81
+ "grad_clip": 1.0,
82
+ "scheduler": "cosine"
83
+ },
84
+ "experiment": {
85
+ "name": "MDLM",
86
+ "wandb_project": "Hybrid-SDLM-ALIGNED"
87
+ },
88
+ "model": {
89
+ "name": "HDLM",
90
+ "type": "ddit",
91
+ "hidden_size": 768,
92
+ "cond_dim": 128,
93
+ "length": 1024,
94
+ "n_blocks": 12,
95
+ "n_heads": 12,
96
+ "dropout": 0.1,
97
+ "scale_by_sigma": false,
98
+ "transformer_sigma_conditioning": false,
99
+ "hybrid_sigma_embedding": false,
100
+ "post_process_logits": false,
101
+ "use_timestep_embedding": false
102
+ },
103
+ "model_type": "epsilon_hybrid"
104
+ }