nimafathi commited on
Commit
0af2e59
·
verified ·
1 Parent(s): 901a865

Upload HDLM model with complete HF integration

Browse files
Files changed (1) hide show
  1. config.yaml +90 -0
config.yaml ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ngpus: 4
2
+ type: aligned
3
+ gradient_accumulation_steps: 2
4
+ tokenizer:
5
+ tokens: 50257
6
+ model: gpt2
7
+ training:
8
+ batch_size: 128
9
+ accum: ${gradient_accumulation_steps}
10
+ n_iters: 1250000
11
+ snapshot_freq: 10000
12
+ log_freq: 500
13
+ eval_freq: 10000
14
+ snapshot_freq_for_preemption: 3000
15
+ snapshot_sampling: true
16
+ ema: 0.9999
17
+ warmup_iter: -1
18
+ loss_type: hybrid
19
+ epsilon: 0.0
20
+ lambda: 0.0
21
+ data:
22
+ train: openwebtext-train
23
+ valid: wikitext103
24
+ cache_dir: /home/toolkit/research-diffcodegen/data
25
+ debug: false
26
+ graph:
27
+ type: absorb
28
+ gamma: 1.0
29
+ file: /home/toolkit/research-diffcodegen/data
30
+ report_all: false
31
+ expanded_sigma: true
32
+ noise:
33
+ type: loglinear
34
+ sigma_min: 0.0001
35
+ sigma_max: 2.0
36
+ ar_diffusion: false
37
+ expanded_sigma: ${graph.expanded_sigma}
38
+ sampling:
39
+ predictor: analytic
40
+ steps_per_level: 1
41
+ noise_removal: true
42
+ strategy: direct
43
+ strategy_param: 0.9
44
+ annealing:
45
+ type: none
46
+ efficient: false
47
+ width: 1024
48
+ tau: 1024
49
+ eval_tau: 1024
50
+ steps_per_level: ${sampling.steps_per_level}
51
+ sampling_method: sdlm
52
+ diffusion_loss_weight: 1.0
53
+ ce_loss_weight: 1.0
54
+ sampling_eps: 0.0001
55
+ attention:
56
+ context_type: block_causal
57
+ block_type: full
58
+ match_inference: false
59
+ eval:
60
+ batch_size: 16
61
+ perplexity: true
62
+ perplexity_batch_size: 8
63
+ optim:
64
+ weight_decay: 0.1
65
+ optimizer: AdamW
66
+ lr: 0.0002
67
+ beta1: 0.9
68
+ beta2: 0.95
69
+ eps: 1.0e-08
70
+ warmup: 10000
71
+ grad_clip: 1.0
72
+ scheduler: cosine
73
+ experiment:
74
+ name: MDLM
75
+ wandb_project: Hybrid-SDLM-ALIGNED
76
+ model:
77
+ name: HDLM
78
+ type: ddit
79
+ hidden_size: 768
80
+ cond_dim: 128
81
+ length: 1024
82
+ n_blocks: 12
83
+ n_heads: 12
84
+ dropout: 0.1
85
+ scale_by_sigma: false
86
+ transformer_sigma_conditioning: false
87
+ hybrid_sigma_embedding: false
88
+ post_process_logits: false
89
+ use_timestep_embedding: false
90
+ model_type: epsilon_hybrid