From e7e2ce551cfbb10d71554fd4ed45a96138a20372 Mon Sep 17 00:00:00 2001 From: Yaossg Date: Thu, 18 Jul 2024 21:38:25 +0800 Subject: [PATCH] first commit --- .gitignore | 2 + README.md | 44 ++++ build.sh | 1 + chat.py | 30 +++ gpt.c | 600 +++++++++++++++++++++++++++++++++++++++++++++++++++++ show.gif | Bin 0 -> 56863 bytes 6 files changed, 677 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 build.sh create mode 100644 chat.py create mode 100644 gpt.c create mode 100644 show.gif diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c3d8f61 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +gpt +gpt2_124M.bin \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..98af70c --- /dev/null +++ b/README.md @@ -0,0 +1,44 @@ +# GPT2 + +原作者:Andrej Karpathy @ https://github.com/karpathy/llm.c + +## 背景 + +GPT 很酷,能不能在我自己的电脑上跑一个呢?当然可以! + +![](show.gif) + +现在给你提供 GPT2 的预训练模型:[点击这里](https://alist.yaossg.com/share/model/gpt2_124M.bin),请把该模型放在本仓库代码的同目录下,按照下面的指示即可运行该程序。 + +## 依赖 + +需要安装 GCC 和 Python3 以及下面的 Python 包 + +```bash +pip3 install tiktoken +``` + +## 编译 + +```bash +bash build.sh +``` + +## 运行 + +```bash +python3 chat.py +``` + +## 目标 + +你可能已经发现了,你的程序可能并没有我演示的跑的那么快(~~神机请忽略~~)。 + +你的目标就是优化该程序的性能,在保证结果不变的情况下更快的完成文本的补全。 + +我会使用一些测试点来评测你的程序的正确性和执行时间。期待更高的效率和更多样的优化方案。 + +此外,请在 wp 中回答下面的问题: + +- 什么是阿姆达尔定律?根据阿姆达尔定律,我们应该把优化的重点放在哪里? +- 你的优化方案和思路是什么?优化的效果受到哪些因素影响? diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..fd20f0d --- /dev/null +++ b/build.sh @@ -0,0 +1 @@ +gcc gpt.c -lm -O3 -std=gnu11 -ggdb -Wall -Werror -Wno-unused-result -Wno-unused-value -Wno-unused-variable -o gpt diff --git a/chat.py b/chat.py new file mode 100644 index 0000000..c608c8d --- /dev/null +++ b/chat.py @@ -0,0 +1,30 @@ +import tiktoken +import subprocess +import time + +length = input("Completion length: ") +length = str(int(length)) # ensure input a valid integer + +text = input("Text to complete: ") +enc = tiktoken.get_encoding("gpt2") + +tokens = [ + str(tok) for tok in enc.encode(text) +] + +start = time.time() + +proc = subprocess.Popen( + ["./gpt", length, *tokens], + stdout=subprocess.PIPE, + text=True +) + +while (line := proc.stdout.readline()): + token = int(line) + print(enc.decode([token]), end='', flush=True) + +print() + +end = time.time() +print(f"It took {end - start:.2f}s to complete the text.") \ No newline at end of file diff --git a/gpt.c b/gpt.c new file mode 100644 index 0000000..f2fd27e --- /dev/null +++ b/gpt.c @@ -0,0 +1,600 @@ +// Original Author: Andrej Karpathy +// https://github.com/karpathy/llm.c + +#include +#include +#include +#include +#include +#include +#include + +// ---------------------------------------------------------------------------- +// all the individual layers' forward passes +// B = batch_size, T = sequence_length, C = channels, V = vocab_size + +void encoder_forward(float* out, + int* inp, float* wte, float* wpe, + int B, int T, int C) { + // out is (B,T,C). At each position (b,t), a C-dimensional vector summarizing token & position + // inp is (B,T) of integers, holding the token ids at each (b,t) position + // wte is (V,C) of token embeddings, short for "weight token embeddings" + // wpe is (maxT,C) of position embeddings, short for "weight positional embedding" + for (int b = 0; b < B; b++) { + for (int t = 0; t < T; t++) { + // seek to the output position in out[b,t,:] + float* out_bt = out + b * T * C + t * C; + // get the index of the token at inp[b, t] + int ix = inp[b * T + t]; + // seek to the position in wte corresponding to the token + float* wte_ix = wte + ix * C; + // seek to the position in wpe corresponding to the position + float* wpe_t = wpe + t * C; + // add the two vectors and store the result in out[b,t,:] + for (int i = 0; i < C; i++) { + out_bt[i] = wte_ix[i] + wpe_t[i]; + } + } + } +} + +void layernorm_forward(float* out, float* mean, float* rstd, + float* inp, float* weight, float* bias, + int B, int T, int C) { + // reference: https://pytorch.org/docs/stable/generated/torch.nn.LayerNorm.html + // both inp and out are (B,T,C) of the activations + // mean and rstd are (B,T) buffers, to be used later in backward pass + // at each position (b,t) of the input, the C-dimensional vector + // of activations gets normalized, then scaled and shifted + float eps = 1e-5f; + for (int b = 0; b < B; b++) { + for (int t = 0; t < T; t++) { + // seek to the input position inp[b,t,:] + float* x = inp + b * T * C + t * C; + // calculate the mean + float m = 0.0f; + for (int i = 0; i < C; i++) { + m += x[i]; + } + m = m/C; + // calculate the variance (without any bias correction) + float v = 0.0f; + for (int i = 0; i < C; i++) { + float xshift = x[i] - m; + v += xshift * xshift; + } + v = v/C; + // calculate the rstd (reciprocal standard deviation) + float s = 1.0f / sqrtf(v + eps); + // seek to the output position in out[b,t,:] + float* out_bt = out + b * T * C + t * C; + for (int i = 0; i < C; i++) { + float n = (s * (x[i] - m)); // normalize + float o = n * weight[i] + bias[i]; // scale and shift + out_bt[i] = o; // write + } + // cache the mean and rstd for the backward pass later + mean[b * T + t] = m; + rstd[b * T + t] = s; + } + } +} + +void matmul_forward(float* out, + float* inp, float* weight, float* bias, + int B, int T, int C, int OC) { + // most of the running time is spent here and in matmul_backward + // OC is short for "output channels" + // inp is (B,T,C), weight is (OC, C), bias is (OC) + // out will be (B,T,OC) + for (int b = 0; b < B; b++) { + for (int t = 0; t < T; t++) { + float* out_bt = out + b * T * OC + t * OC; + float* inp_bt = inp + b * T * C + t * C; + for (int o = 0; o < OC; o++) { + float val = (bias != NULL) ? bias[o] : 0.0f; + float* wrow = weight + o*C; + for (int i = 0; i < C; i++) { + val += inp_bt[i] * wrow[i]; + } + out_bt[o] = val; + } + } + } +} + +void attention_forward(float* out, float* preatt, float* att, + float* inp, + int B, int T, int C, int NH) { + // input is (B, T, 3C) holding the query, key, value (Q, K, V) vectors + // preatt, att are (B, NH, T, T). NH = number of heads, T = sequence length + // that holds the pre-attention and post-attention scores (used in backward) + // output is (B, T, C) + // attention is the only layer that mixes information across time + // every other operation is applied at every (b,t) position independently + // (and of course, no layer mixes information across batch) + int C3 = C*3; + int hs = C / NH; // head size + float scale = 1.0 / sqrtf(hs); + + for (int b = 0; b < B; b++) { + for (int t = 0; t < T; t++) { + for (int h = 0; h < NH; h++) { + float* query_t = inp + b * T * C3 + t * C3 + h * hs; + float* preatt_bth = preatt + b*NH*T*T + h*T*T + t*T; + float* att_bth = att + b*NH*T*T + h*T*T + t*T; + + // pass 1: calculate query dot key and maxval + float maxval = -10000.0f; // TODO something better + for (int t2 = 0; t2 <= t; t2++) { + float* key_t2 = inp + b * T * C3 + t2 * C3 + h * hs + C; // +C because it's key + + // (query_t) dot (key_t2) + float val = 0.0f; + for (int i = 0; i < hs; i++) { + val += query_t[i] * key_t2[i]; + } + val *= scale; + if (val > maxval) { + maxval = val; + } + + preatt_bth[t2] = val; + } + + // pass 2: calculate the exp and keep track of sum + // maxval is being calculated and subtracted only for numerical stability + float expsum = 0.0f; + for (int t2 = 0; t2 <= t; t2++) { + float expv = expf(preatt_bth[t2] - maxval); + expsum += expv; + att_bth[t2] = expv; + } + float expsum_inv = expsum == 0.0f ? 0.0f : 1.0f / expsum; + + // pass 3: normalize to get the softmax + for (int t2 = 0; t2 < T; t2++) { + if (t2 <= t) { + att_bth[t2] *= expsum_inv; + } else { + // causal attention mask. not strictly necessary to set to zero here + // only doing this explicitly for debugging and checking to PyTorch + att_bth[t2] = 0.0f; + } + } + + // pass 4: accumulate weighted values into the output of attention + float* out_bth = out + b * T * C + t * C + h * hs; + for (int i = 0; i < hs; i++) { out_bth[i] = 0.0f; } + for (int t2 = 0; t2 <= t; t2++) { + float* value_t2 = inp + b * T * C3 + t2 * C3 + h * hs + C*2; // +C*2 because it's value + float att_btht2 = att_bth[t2]; + for (int i = 0; i < hs; i++) { + out_bth[i] += att_btht2 * value_t2[i]; + } + } + } + } + } +} + +#define GELU_SCALING_FACTOR sqrtf(2.0f / M_PI) +void gelu_forward(float* out, float* inp, int N) { + // (approximate) GeLU elementwise non-linearity in the MLP block of Transformer + for (int i = 0; i < N; i++) { + float x = inp[i]; + float cube = 0.044715f * x * x * x; + out[i] = 0.5f * x * (1.0f + tanhf(GELU_SCALING_FACTOR * (x + cube))); + } +} + +void residual_forward(float* out, float* inp1, float* inp2, int N) { + for (int i = 0; i < N; i++) { + out[i] = inp1[i] + inp2[i]; + } +} + +void softmax_forward(float* probs, float* logits, int B, int T, int V) { + // output: probs are (B,T,V) of the probabilities (sums to 1.0 in each b,t position) + // input: logits is (B,T,V) of the unnormalized log probabilities + for (int b = 0; b < B; b++) { + for (int t = 0; t < T; t++) { + // probs <- softmax(logits) + float* logits_bt = logits + b * T * V + t * V; + float* probs_bt = probs + b * T * V + t * V; + + // maxval is only calculated and subtracted for numerical stability + float maxval = -10000.0f; // TODO something better + for (int i = 0; i < V; i++) { + if (logits_bt[i] > maxval) { + maxval = logits_bt[i]; + } + } + float sum = 0.0f; + for (int i = 0; i < V; i++) { + probs_bt[i] = expf(logits_bt[i] - maxval); + sum += probs_bt[i]; + } + for (int i = 0; i < V; i++) { + probs_bt[i] /= sum; + } + } + } +} + +// ---------------------------------------------------------------------------- +// GPT-2 model definition + +// the parameters of the model +#define NUM_PARAMETER_TENSORS 16 +typedef struct { + float* wte; // (V, C) + float* wpe; // (maxT, C) + float* ln1w; // (L, C) + float* ln1b; // (L, C) + float* qkvw; // (L, 3*C, C) + float* qkvb; // (L, 3*C) + float* attprojw; // (L, C, C) + float* attprojb; // (L, C) + float* ln2w; // (L, C) + float* ln2b; // (L, C) + float* fcw; // (L, 4*C, C) + float* fcb; // (L, 4*C) + float* fcprojw; // (L, C, 4*C) + float* fcprojb; // (L, C) + float* lnfw; // (C) + float* lnfb; // (C) +} ParameterTensors; + +// allocate memory for the parameters and point the individual tensors to the right places +float* malloc_and_point_parameters(ParameterTensors* params, size_t* param_sizes) { + size_t num_parameters = 0; + for (size_t i = 0; i < NUM_PARAMETER_TENSORS; i++) { + num_parameters += param_sizes[i]; + } + // malloc all parameters all at once + float* params_memory = (float*)malloc(num_parameters * sizeof(float)); + // assign all the tensors + float** ptrs[] = { + ¶ms->wte, ¶ms->wpe, ¶ms->ln1w, ¶ms->ln1b, ¶ms->qkvw, ¶ms->qkvb, + ¶ms->attprojw, ¶ms->attprojb, ¶ms->ln2w, ¶ms->ln2b, ¶ms->fcw, ¶ms->fcb, + ¶ms->fcprojw, ¶ms->fcprojb, ¶ms->lnfw, ¶ms->lnfb + }; + float* params_memory_iterator = params_memory; + for (size_t i = 0; i < NUM_PARAMETER_TENSORS; i++) { + *(ptrs[i]) = params_memory_iterator; + params_memory_iterator += param_sizes[i]; + } + return params_memory; +} + +#define NUM_ACTIVATION_TENSORS 23 +typedef struct { + float* encoded; // (B, T, C) + float* ln1; // (L, B, T, C) + float* ln1_mean; // (L, B, T) + float* ln1_rstd; // (L, B, T) + float* qkv; // (L, B, T, 3*C) + float* atty; // (L, B, T, C) + float* preatt; // (L, B, NH, T, T) + float* att; // (L, B, NH, T, T) + float* attproj; // (L, B, T, C) + float* residual2; // (L, B, T, C) + float* ln2; // (L, B, T, C) + float* ln2_mean; // (L, B, T) + float* ln2_rstd; // (L, B, T) + float* fch; // (L, B, T, 4*C) + float* fch_gelu; // (L, B, T, 4*C) + float* fcproj; // (L, B, T, C) + float* residual3; // (L, B, T, C) + float* lnf; // (B, T, C) + float* lnf_mean; // (B, T) + float* lnf_rstd; // (B, T) + float* logits; // (B, T, V) + float* probs; // (B, T, V) + float* losses; // (B, T) +} ActivationTensors; + +float* malloc_and_point_activations(ActivationTensors* acts, size_t* act_sizes) { + size_t num_activations = 0; + for (size_t i = 0; i < NUM_ACTIVATION_TENSORS; i++) { + num_activations += act_sizes[i]; + } + float* acts_memory = (float*)malloc(num_activations * sizeof(float)); + float** ptrs[] = { + &acts->encoded, &acts->ln1, &acts->ln1_mean, &acts->ln1_rstd, &acts->qkv, &acts->atty, + &acts->preatt, &acts->att, &acts->attproj, &acts->residual2, &acts->ln2, &acts->ln2_mean, + &acts->ln2_rstd, &acts->fch, &acts->fch_gelu, &acts->fcproj, &acts->residual3, &acts->lnf, + &acts->lnf_mean, &acts->lnf_rstd, &acts->logits, &acts->probs, &acts->losses + }; + float* acts_memory_iterator = acts_memory; + for (size_t i = 0; i < NUM_ACTIVATION_TENSORS; i++) { + *(ptrs[i]) = acts_memory_iterator; + acts_memory_iterator += act_sizes[i]; + } + return acts_memory; +} + +typedef struct { + int max_seq_len; // max sequence length, e.g. 1024 + int vocab_size; // vocab size, e.g. 50257 + int num_layers; // number of layers, e.g. 12 + int num_heads; // number of heads in attention, e.g. 12 + int channels; // number of channels, e.g. 768 +} GPT2Config; + +typedef struct { + GPT2Config config; + // the weights (parameters) of the model, and their sizes + ParameterTensors params; + size_t param_sizes[NUM_PARAMETER_TENSORS]; + float* params_memory; + int num_parameters; + // gradients of the weights + ParameterTensors grads; + float* grads_memory; + // buffers for the AdamW optimizer + float* m_memory; + float* v_memory; + // the activations of the model, and their sizes + ActivationTensors acts; + size_t act_sizes[NUM_ACTIVATION_TENSORS]; + float* acts_memory; + int num_activations; + // gradients of the activations + ActivationTensors grads_acts; + float* grads_acts_memory; + // other run state configuration + int batch_size; // the batch size (B) of current forward pass + int seq_len; // the sequence length (T) of current forward pass + int* inputs; // the input tokens for the current forward pass + int* targets; // the target tokens for the current forward pass + float mean_loss; // after a forward pass with targets, will be populated with the mean loss +} GPT2; + +void gpt2_build_from_checkpoint(GPT2 *model, char* checkpoint_path) { + + // read in model from a checkpoint file + FILE *model_file = fopen(checkpoint_path, "rb"); + if (model_file == NULL) { printf("Error opening model file\n"); exit(1); } + int model_header[256]; + fread(model_header, sizeof(int), 256, model_file); + if (model_header[0] != 20240326) { printf("Bad magic model file"); exit(1); } + if (model_header[1] != 1) { printf("Bad version in model file"); exit(1); } + + // read in hyperparameters + int maxT, V, L, NH, C; + model->config.max_seq_len = maxT = model_header[2]; + model->config.vocab_size = V = model_header[3]; + model->config.num_layers = L = model_header[4]; + model->config.num_heads = NH = model_header[5]; + model->config.channels = C = model_header[6]; + + // allocate space for all the parameters and read them in + model->param_sizes[0] = V * C; // wte + model->param_sizes[1] = maxT * C; // wpe + model->param_sizes[2] = L * C; // ln1w + model->param_sizes[3] = L * C; // ln1b + model->param_sizes[4] = L * (3 * C) * C; // qkvw + model->param_sizes[5] = L * (3 * C); // qkvb + model->param_sizes[6] = L * C * C; // attprojw + model->param_sizes[7] = L * C; // attprojb + model->param_sizes[8] = L * C; // ln2w + model->param_sizes[9] = L * C; // ln2b + model->param_sizes[10] = L * (4 * C) * C; // fcw + model->param_sizes[11] = L * (4 * C); // fcb + model->param_sizes[12] = L * C * (4 * C); // fcprojw + model->param_sizes[13] = L * C; // fcprojb + model->param_sizes[14] = C; // lnfw + model->param_sizes[15] = C; // lnfb + + // cound the number of paramaters + size_t num_parameters = 0; + for (size_t i = 0; i < NUM_PARAMETER_TENSORS; i++) { + num_parameters += model->param_sizes[i]; + } + model->num_parameters = num_parameters; + + // read in all the parameters from file + model->params_memory = malloc_and_point_parameters(&model->params, model->param_sizes); + fread(model->params_memory, sizeof(float), num_parameters, model_file); + fclose(model_file); + + // other inits + model->acts_memory = NULL; + model->grads_memory = NULL; + model->m_memory = NULL; + model->v_memory = NULL; + model->grads_acts_memory = NULL; + model->inputs = NULL; + model->targets = NULL; + model->batch_size = 0; + model->seq_len = 0; + model->mean_loss = -1.0f; // -1.0f will designate no loss +} + +void gpt2_forward(GPT2 *model, int* inputs, int B, int T) { + // convenience parameters + int V = model->config.vocab_size; + int L = model->config.num_layers; + int NH = model->config.num_heads; + int C = model->config.channels; + + // record the current B,T as well + model->batch_size = B; + model->seq_len = T; + // and now allocate the space + model->act_sizes[0] = B * T * C; // encoded + model->act_sizes[1] = L * B * T * C; // ln1 + model->act_sizes[2] = L * B * T; // ln1_mean + model->act_sizes[3] = L * B * T; // ln1_rstd + model->act_sizes[4] = L * B * T * 3*C; // qkv + model->act_sizes[5] = L * B * T * C; // atty + model->act_sizes[6] = L * B * NH * T * T; // preatt + model->act_sizes[7] = L * B * NH * T * T; // att + model->act_sizes[8] = L * B * T * C; // attproj + model->act_sizes[9] = L * B * T * C; // residual2 + model->act_sizes[10] = L * B * T * C; // ln2 + model->act_sizes[11] = L * B * T; // ln2_mean + model->act_sizes[12] = L * B * T; // ln2_rstd + model->act_sizes[13] = L * B * T * 4*C; // fch + model->act_sizes[14] = L * B * T * 4*C; // fch_gelu + model->act_sizes[15] = L * B * T * C; // fcproj + model->act_sizes[16] = L * B * T * C; // residual3 + model->act_sizes[17] = B * T * C; // lnf + model->act_sizes[18] = B * T; // lnf_mean + model->act_sizes[19] = B * T; // lnf_rstd + model->act_sizes[20] = B * T * V; // logits + model->act_sizes[21] = B * T * V; // probs + model->act_sizes[22] = B * T; // losses + size_t num_activations = 0; + for (size_t i = 0; i < NUM_ACTIVATION_TENSORS; i++) { + num_activations += model->act_sizes[i]; + } + model->num_activations = num_activations; + + if (model->acts_memory) { + free(model->acts_memory); + model->acts_memory = NULL; + } + model->acts_memory = malloc_and_point_activations(&model->acts, model->act_sizes); + + // also create memory for caching inputs and targets + if (model->inputs) { + free(model->inputs); + } + model->inputs = (int*)malloc(B * T * sizeof(int)); + + // cache the inputs/targets + memcpy(model->inputs, inputs, B * T * sizeof(int)); + + // forward pass + ParameterTensors params = model->params; // for brevity + ActivationTensors acts = model->acts; + float* residual; + encoder_forward(acts.encoded, inputs, params.wte, params.wpe, B, T, C); // encoding goes into residual[0] + for (int l = 0; l < L; l++) { + + residual = l == 0 ? acts.encoded : acts.residual3 + (l-1) * B * T * C; + + // get the pointers of the weights for this layer + float* l_ln1w = params.ln1w + l * C; + float* l_ln1b = params.ln1b + l * C; + float* l_qkvw = params.qkvw + l * 3*C * C; + float* l_qkvb = params.qkvb + l * 3*C; + float* l_attprojw = params.attprojw + l * C * C; + float* l_attprojb = params.attprojb + l * C; + float* l_ln2w = params.ln2w + l * C; + float* l_ln2b = params.ln2b + l * C; + float* l_fcw = params.fcw + l * 4*C * C; + float* l_fcb = params.fcb + l * 4*C; + float* l_fcprojw = params.fcprojw + l * C * 4*C; + float* l_fcprojb = params.fcprojb + l * C; + + // get the pointers of the activations for this layer + float* l_ln1 = acts.ln1 + l * B * T * C; + float* l_ln1_mean = acts.ln1_mean + l * B * T; + float* l_ln1_rstd = acts.ln1_rstd + l * B * T; + float* l_qkv = acts.qkv + l * B * T * 3*C; + float* l_atty = acts.atty + l * B * T * C; + float* l_preatt = acts.preatt + l * B * NH * T * T; + float* l_att = acts.att + l * B * NH * T * T; + float* l_attproj = acts.attproj + l * B * T * C; + float* l_residual2 = acts.residual2 + l * B * T * C; + float* l_ln2 = acts.ln2 + l * B * T * C; + float* l_ln2_mean = acts.ln2_mean + l * B * T; + float* l_ln2_rstd = acts.ln2_rstd + l * B * T; + float* l_fch = acts.fch + l * B * T * 4*C; + float* l_fch_gelu = acts.fch_gelu + l * B * T * 4*C; + float* l_fcproj = acts.fcproj + l * B * T * C; + float* l_residual3 = acts.residual3 + l * B * T * C; + + // now do the forward pass + layernorm_forward(l_ln1, l_ln1_mean, l_ln1_rstd, residual, l_ln1w, l_ln1b, B, T, C); + matmul_forward(l_qkv, l_ln1, l_qkvw, l_qkvb, B, T, C, 3*C); + attention_forward(l_atty, l_preatt, l_att, l_qkv, B, T, C, NH); + matmul_forward(l_attproj, l_atty, l_attprojw, l_attprojb, B, T, C, C); + residual_forward(l_residual2, residual, l_attproj, B*T*C); + layernorm_forward(l_ln2, l_ln2_mean, l_ln2_rstd, l_residual2, l_ln2w, l_ln2b, B, T, C); + matmul_forward(l_fch, l_ln2, l_fcw, l_fcb, B, T, C, 4*C); + gelu_forward(l_fch_gelu, l_fch, B*T*4*C); + matmul_forward(l_fcproj, l_fch_gelu, l_fcprojw, l_fcprojb, B, T, 4*C, C); + residual_forward(l_residual3, l_residual2, l_fcproj, B*T*C); + } + residual = acts.residual3 + (L-1) * B * T * C; // last residual is in residual3 + layernorm_forward(acts.lnf, acts.lnf_mean, acts.lnf_rstd, residual, params.lnfw, params.lnfb, B, T, C); + matmul_forward(acts.logits, acts.lnf, params.wte, NULL, B, T, C, V); + softmax_forward(acts.probs, acts.logits, B, T, V); +} + +void gpt2_zero_grad(GPT2 *model) { + if(model->grads_memory != NULL) { memset(model->grads_memory, 0, model->num_parameters * sizeof(float)); } + if(model->grads_acts_memory != NULL) { memset(model->grads_acts_memory, 0, model->num_activations * sizeof(float)); } +} + +void gpt2_free(GPT2 *model) { + free(model->params_memory); + free(model->grads_memory); + free(model->m_memory); + free(model->v_memory); + free(model->acts_memory); + free(model->grads_acts_memory); + free(model->inputs); + free(model->targets); +} + +int sample_mult(float* probabilities, int n) { + // sample index from probabilities (they must sum to 1!) + // coin can be a random number in [0, 1), usually from random_f32() + float cdf = 0.0f, coin = 0.5f; + for (int i = 0; i < n; i++) { + cdf += probabilities[i]; + if (coin < cdf) { + return i; + } + } + return n - 1; // in case of rounding errors +} + +// the GPT-2 end-of-text token id +#define GPT2_EOT 50256 + +int main(int argc, char* argv[]) { + GPT2 model; + gpt2_build_from_checkpoint(&model, "gpt2_124M.bin"); + + if (argc < 3) { + printf("Provide completion length and at least one token.\n"); + exit(1); + } + + const int input_offset = 2; + int completion_length = atoi(argv[1]); + int input_length = argc - input_offset; + if (input_length > completion_length) { + printf("Tow many tokens.\n"); + exit(1); + } + + int tokens[completion_length]; + + for (int i = 0; i < completion_length; i++) { + if (i < input_length) { + tokens[i] = atoi(argv[input_offset + i]); + } else { + tokens[i] = GPT2_EOT; + } + } + + for (int t = input_length; t < completion_length; t++) { + gpt2_forward(&model, tokens, 1, t); + float* probs = model.acts.probs + (t-1) * model.config.vocab_size; + int next_token = sample_mult(probs, model.config.vocab_size); + tokens[t] = next_token; + + printf("%d\n", tokens[t]); + fflush(stdout); + } + + gpt2_free(&model); +} diff --git a/show.gif b/show.gif new file mode 100644 index 0000000000000000000000000000000000000000..ff800f3b0fbb845d4d5bbe16100335b4ca5dc1df GIT binary patch literal 56863 zcmeEvbzGNs_qTzHjZREZlnxaEl~AOVMx;?dy1PL|LFw-9?r!Ps?vM@>u?wDaU0*cQG$*LnuU!Mb z^$rV*4E|09f2~3OSS%S89Rw*QEhfm8f3fmAOTVBvMM^TM++`e=Bj^&*@mX?-QckkZ4ckiCHwY80njjgS%ot>S%y@R8plarH+vx}>X ztGlb4hnu^nyN9>Cr?-cvkEfTfrFDh3 z?C$I8>F?TOXzJ2@d-Mjbi-+%b<;p4}TpFVwBSXfwG zT*O{j6OOIfw(d^ZUF=^dq-xW`D{OHHd4jmoBdQ$t)2a{Fx`&mz60keqac4x;d6Eey zNY!OTH~3O3)CX~A#xw=e8$2AW%be!hM@7E{pC>D>Et1QVB3ZTEwk1|Dk|&raJE1F4 zB2#@Rq5b04O}rHj_`EqueVH5avvkX{lLvBj$LoW6b5o>#q1qBQ)R3F{phRJ78Z~R4 z)o6v?FO)KkdFkUdZj?A7eEAuZ4Suv5!;Sfw(=B1_j)(aRvSvHtgcD_&3bG&d+*{ia z!e5y4WFXJr(Qs2??z0D_cefr9xS#iYyw;QQdh`AK7t^hgyrBX`1+O0UW@ogBsuG=~V&9~nq7TEUV;jfaaDUC5S!GML`+iZxI^Am1!LOC%xLr@hSWbuD*{Cja zqV4T`WSAXELY&b)<+jMfj+ZEAqpEJ@5k`3_C(Op&Uc1hYb>A^bm(U;|^@s6XRqG;8 z-t>7vl(3LiO+k6_PUo>{Z^y=jM=AFyh?G-HH{N)dTE169Gph1%;^U0QGmjo;wqBxq zlGQ1u@g%!fIq^x(pz)(8xg$1|PxB^xG@j+Hc`!$V(rtME5jyDnk(OS_VHW=4ky*~%01$m&#U&UBt5UjGkN^HhQO9; zzLv;WbH0u&Hfg?|D);ex19c76i$=O0%@ps=kBQTSd;7HCj@pr?yd85o`}FO&8|R646JFxl?4#^f zn@)b5tK6sa@p&z2>c{!Uv(G-hXyrWl>1C(5&Zk$sDyg4d51Kst^k&5N))K(mRKWDqZvMmC~maFpE!RK~dd=a~SYb!|@pW6#MMeK>?s-n31 z+(Du-VsCD3)d}m5O zYHOI^KX<*eEs9_hcP-n&c{e-0sAK!;YA-R)ySq6>5s`A&@m!tv@N0}BIa^mJU_S2| zwk?{Ble=ChVBRZ^FPcKUu3oHQ-aE}Hno5Pc;cEZ9PhMm636r`8nfLR)rQ2etZMhre z55DlL<%^;5t!q?feBs~f6hjxw-K2i?ML=(3%$eM}CT;T1()~^#fi6&G5xC z_S7{S7rY3bbBaAT&D~<&|03kAd}A#0o4OW@_b)=%Y>#8v#M5eZ@MRb_e;nJs`c@mp zm*F_haU7&PZ4OsoM(l5jyL7g`&Bgp>B*FH0E>50ykARm^Wc=|w;`Qx51uvtio#Xjb zcsc_5U&hck#S56!cZ577AwW8gwPb4+*HGcD*|BErntf|xq4QX~cI~=#>(;MdkA;P` zVZ(-v8#iv+vaNyv`0$Y*9w5fTy-78Vu}5fK#?6%!K^7Z;b1kdTy=yn6NOwQJX; zq@<*!rDbGfu3x_{D=RA}Cnqm2ub`lysHmuC#4Z(v|xXlQ6;WMph?jPw|$&}EpJo15RbapNX58@Fy*SXkV? zeftik->~{-$6@c_08IzdbvQdaySTWxy1IIKc|}Ktr>7>C6hS7Gv@}$$4HJ6krR?o5tBi)PE7)hJ0;E6-Tb$@d}#eY2Qn4Vek+h^VI|0^=*W-L zQ5>hEJaL-xwzRZu$>49|QmR8V^LxTLeBgdqNqm#1} zk{(EYxVgExySsaMczAkxdU<(!d;9qK_`>Js=jZS59}o}_7#I{36dW8J5)u*`iuU4Y zHy#P!Z+GOY`*EZlhYvdPtgI|(%5!pZbJ51Upz!|vqSE5x($dnh^74wx%F62M>e||x zhWh%Z#)jsmhSui#_Lhc@)`rfuhOV}ieEm8(meS=n^P{n+qp=6F0ZE9K-tM-(o{qlW zp8o#6{(-@Pp$CJ*V?z(dhepOn$0nzzXZ}n_j^u~Y8nusyxY9;0Xs+9SkRwi4uH0t* z0XC<%I1!ch`;NE!8SK~-CO2JM#>VCznPN~P5J6@36fe?Jq&ELt-t)mUE1%}k2(=8^ z%lFQ2UPv+5P88|pBb*wWn4_7`X3QfY!r z92giH92y-O9)1j;)ES*51-+kf1z zKeEeU%Ej*7v1UAPOD7hgkDD5TqAkyhI(+1`87~c>v0eC}q`iY6m}znH9He3~lj`J| zl$QlABfaEucCKzDp^JnjQbt$39KWff%Q6y@ZVtg5V{uA-`;rmhXkDGhCXEgeH`T_bHhV;y}H9eqnm7Z{p{)rDU_PuO3wvK_Gvpd`}>V; z`F_;nmu%Ocmx)qHiQ@L$)i5{qs!1x%o08DDv-T7bzuO|mSf?aTBm?j1#c+MKbQLy= z<0Z*>IakIiuO4r@wcc>LbhlasvKlrwaA)+=cVZumy(64yL+G z->;8i@3i$yv@v<7Z>EC@12aqlLJsO0LK>Rs8<`mx zn;V*#8=KxRHM?nk;}&d>VVr|m&dTbm`iQQMkwMPM!ONSDot5CLm^Y92XlK z9~+kt7oQN1td5c0F%(Bw8D~Iqnw^`So1dFkm{(AoUszIbzpSvR98wRB%KOD=lvEZ$ zlvWm(Rh5=k!8Ww&Mgzo&1o zw|}S)vTtx?U}zK?R5XwT#Kh2piQ$on2V;|?<5T04Gq6mao_+KXNk!NqBP(QBAAf&$ zj3l7iQUaD}U^>;K+Zks}PoH~BJ;relhxkmTs-D6{=PeBL@`~Gn&x&AOG(S|m6`#>- z&(R14I-H1HOFUa#L;Dxw0*2>l7q<255w!SST*Sv^*h9`0%|b4~=dVjq5igW3lYJ29 zb9c)3Z}$D%9z{W3K}kVTNfBA2{Ky^!vqt&O9%ZRJU6~WVdAV7c6VWxwS92n)RDNep z3<(Vn4U6~-=EOYYfKyOdv}#T)N9V+!>Btzgr<0R^MR#iUt@5`rCz4=YYz$(MzJpD3Z}LjdNmnN_kLO&<(65e>S<#pz zhS3`s>}YRQ+9ARkLcVMMB{xo7v;XPM!;SX3(fK}$GS!{$TqkNgr-Xz)ZQ7EU610cZ z1~)KWPSRRs-$ty@*(mXEHvU{T{@_;g2e&pWTg{cF=5N(PWU0Aa3%|-n*xGzf$5qYf zceXaaSqp!?wpq2cLDn|;kc|bfwJ9ksE{II203(nFI3vXnOuGN4tAM9I`$V~xr zP=I@a;z}WeQurGOh0phdFV-}$F-0#5s}2g7h3OrO+m=uWEiGYNdhhN%8*3Xo8#@PE zdnY>wXFCTMdq-C^+#H-B+#Q|Wom@PeT|HggJYC(q+&lotfS?!vzu@DGgr9$)e?Slh zxex@%g^)0)f}!D&Vc}8X5z!HmF;UU6(J@E^8XKP&7oQZLkera1l8}^|n4FrFl9rO1 zo|2l8nwF84o|Tc6o0*-Tom-fbcRx43D8HZ>=0IfpE2}E2s4lOpt*EZ6tf{Z6X{fGk ztgUaVYiO=-Xl`h1X>4j~YHn$6Zh^0@wYj~uxudPAv#s$P{pgo%Xm>|rcSloqM{{>q zdv|wNS9fn$PhWTMKu;fZq7V88M=+z{!0QkBeT| zMGN)biID|Pl4~b3_Gj)Di`$;a*2uUog)JW^b;GLCsQVAf&(8;q|G=2|-#+;L$M!T| zl}4mh&CkDozp!NW8nwEjzP75dzPh=wrlskx-P2$W8cS=`f!>b6{;r{co(Dtyqa#D( zW22K3&?p&YHwxs`>o4_ z_xCOn$c+%HUii;kCQ!&8<}!h~5a#CO!es*8#UPi7qM{<`HcKnY%c?3XYN{*i>R|SR z%f!mUrM~0OuRdV~)7jk9*#e6g^hWr_?1x+?{=(T$`&%XN^XwONmp6#PAa6?Wqsr`f znsfy-4I|fG2OcY*JMMCu>+z`Wa*~bFY~mSj4%xn|doa3-Li7~TPlF)@)L8K|BPCXT|%vYhOMul)|XH7OAr~%1u(y$sGty?{EEv; zO3MHxgDyWS>uRgfIzRMa3~qmbUH=PC^CsWzDYx7= zdK#1`?X-o4BU{5a&7}}q`6K6K4+F~{Tr0*+6eN+-_~+ZB>=MRPJxDbUN42?cEAwaD zWqgjtjZe^l{z5^JnZW)7_c&$k6R*-v2YGKPV+`8wnIz%9R0+{LeAxeTF8+ImUk0pw zbM?8Bi_3_$&z10d6q2uU@!vcA-+G5n`d?1KAGpK+NHP4ML{$GDTd-9A-3u1D1OkNm z%LU8uU%6o6`ED`%dcm@@?#>6JsF90;Q;kh}{`yM1R!6yg8o6H(T@G43>E-<+)9YVA z_P>DapP@)1_l5r=Ap0PK_^kX#X5znqY$Ox^aHok-mH!1~m#?Nq{O1ALr;+r;g7lnR zbK*}h`+(KA7x;WY<~xLshYu{Kqeln`362pTB_usYL`FzVPDDaUL<)?i6Q}|KHAoSV zB7y`MY7-3^CBzb2iJXddnU!>!0$52WPQ$22MST`f*C%KgPSP+?(=yVWK1X|&`8557 zGmNZfnb_#hu``_KU}V0?bm7uD7S8jmToBA`+!xq+SlD@3Ie6JF^09OBb8rb<;uhrO zy~52W%)>9tDmAIW;RiGe0M zOUi0WD{4zC>dGqX%B$)jDykbQYZ@_8Rok>2&DC`fEj9HmwWypxT_b1-bf5|Wovm%1 zZS5Ux9bF&~(B28r*WT6N(LD&!*)!DDJKWR%pr?PNcVM({5Mr!62&AkwUq!W)WrfeWfKZqq|{AOCUXx(`N65 zB`tyV>=B5T0IwItbyQ2B-0uu%38?KRZ3$($d+8-Pq9wrLN+BIUOWqMHD9-52ldWD8 zFP5o}Uyz+BZZA>d@adp&QeTo%OM-QPaq_h&o$&~7-m~fh1*U4lO%1t|!^L-QY&<;Z zl{T8xxPwyL-!xsp)7hXn@Z3Aaa0zP}p%f~^mO=?4!pp4Qn)(@{>=M;io6O|>&&T5m z+&KGW@}WesQn8D4EA5QCb3@~TQ((@N$v(Yjl6D_*8wYb996vk5^s?k==9R5Mb&W?X+vLIDUxvjaSjMlvtwgGc7DV8EBg5l(OapXoRsR z4v<-UopCna(DY{KZBL`KZYqIVmjv}ZE%kA%7j|WZ5$=efalc1!)pV38%#AD19t)RV zl1gbLM%L!w@P0?fj#3FX-TQOhg7Uk!WYs&}$D(glOiuINM!{|V)>Y@(oDkm0jZG#* zdiYyJ6sR&K%?`1~C5Z)iVQC%U^>}6)tKePr?yZ{-?Ot0FZ!v+XNZqU|k8Oc6DSM~1 zn%|w_4|!Hgjzi|@e3r;&i}xwX1M8&a@$QUf*QipS&B-6q)IMP!#A=P_H*%qzpHT7U z=2H$az6+jW=CfX&v(u7VyNbjqtYmiVKV*~BtS(ELt|f_+c#MLZGUe1^3L^GKmi~uK zK|AOM44oGuXK#fNv%5uiIheP)hc6gJMb=u>%CiRUHhkhN;Sl&JZi9QE@~x06$N21L zf(7`7v*o;k?l`;NHD?}5VZzbYRxW&^GdsKgXwl|ziBOicnTd1dhZZZ+D(Bf=9=1Io zurEF5CEKRCsyaqn8czF3UmWdJ3#WPgop+O76mJ~O*+Ry$=@vybRpN()8vKEqG-aEd zKQ_0puD|uLom}{%ez{A^3{A%lV&kadjoA$+bNsEU$tpKl@II-;K4NCr@=PkjE%Bi3 zhnB)DZ7rh(cF*h~J)Jkto#tDGlc;iUnOA&l*^`+~^KvZhH6>-{j=-km3}G+hw=4kz zXZiCtrth1uxXnxP?t%{uZ#GBp`UM8I#I1CXS*krGiObW^GQDSa-YjucBeB&bD&wq% z21CE>v4F-*r*ej-+mB!6$3!^shBg|y42oT)NqiSa5*R+UAoxOz`GVlfj-+Zw*ZjZ` z3!05rrEk5r2*7*Wg>^dg6Y(frNQJ7{-U39<^F!owwx!>!=1PCi~Z8kyjLdm{i_b6@JBxjQ+q@V;s?Die4Up*M1&e4w4M^ zPTZ;z6|Q_`CM0D8_B(jXD-J7Tu?O}A3)paZfULkVu8Cn{9I;l+cyZ)0mqV_R!eJ49PkM_VI`T>BPy4d@l0@oO05T1!`FTUTdCXJ=PuR}V&fr@MEs zr+>J2;6eZ3C|XXJD(W(m0;!p+@oN(Rn0fdZNE9H#^CJ>GNZGVpKIW^hBK&%obsd*3 zxMqlbOi?lY&`J5dw1=afQv?%R5$jysL#9scF;ZxIW*~QGy>EtV(%A0)^k7;iwmmO!@f@bd?t!s8& zOYFi!NHhELpOuw#XPC|~oJUcmY(V~H#-K_$IDy{8jS5vPsZ_V&W3wAXI_y3js|kD{~z~EmV?~SC*4kkyB8GkXJ-PK}k(fNe!|R zB^au!s%faJYigiE6)3D!4+OCE^o>9R3m`U#N(CZOVU8x_ts6IQ-@J9_riJA#3#(hV z?^@ixXK@FK+m`q4SXx_JSzFz;0eI;>Yg<5=LVDVvLKUlJD%^mDkK8o85MeA(#q##| z@_}YH&^&h{|*Q>`rGv&I*ru|MHE8ynpi#y+ikE?sAx`hx*V|&_*4@&R>g4e2%8u76~&=!quJ)|*l5`##Y~&TYd-y{Y4a-`8AL<| z1Y|%xW@+psI!Z`#jEEE+J1K}sDTzVvj_f!o#Yr+sYI3Sm6R{ zIC-wX*vT&ldUTS)V%NkZWF$b0PD&m|P3h}OGU&8Ib=s=xAXulTsco>#2{r*vFvad0KRSQ0q~7rzJCS#M!?^I{6@jw2>u(u-+=sf2jn+Kyv`Ri>w^3Og8c(S z0)oPVg2RJDB7&C^G&(FICL$^>DkdR1HZdkXIW8eJAsI7g=4Rz$h=xcC7Az4BD`DnD z$4+GOM264Os^Ze>;zZmlOW3ud5_X_s*VfR~ z*4WhA)Z7YiZ~%n20vx=prK7#IqrI&IzRs?W&TgbscJ+01!yr2Ft3XE6<>?eNpd!=h zH|Nst%%2E4d~6D)&BssX=3l%55xeh9n_TJg7x(N%lqy%h>oQ|62Qu9T<+_&&tr4JHr(|jv-x13l$QI0VI<_-GA~WZH0!*8c*DD-0 z)ER+t-Ij(3=?1am1Gx%A<-r4@Cx!||o{YhdlMhNbKCL@EC`>(C!Lp0;XlNJ2*G!7R@{T2c_vd#fPN|c^{G_FC5ucrJ0#_2}`(b z;5aomy}88#@saJ)qc`7gsGD{&Hz^y<=Ovh~E41H|iIDLb%a zcg=@T7cg{=yRz=U8rRy)W>}}(*hzEDd>y|1q4RLl>$k8ztBY@qiQnTDo>r;8FV?$It6oO65hrUR?V!H6n|ZGJ zo68A<8ERA)Gi^j3vCy=uB;B3Ef2qM9|N8Y*t_b7A$bGBk8I2+_`GZ<9G8sMbr4a|c zo~r6s^FGKt=)JW)pP}88his4dpoUi=cBRN$t(=6Uj?EuLizk z39L#uT)eTvfG$wVU}BCF=f(knTC7>K>KM!2UP=yoitryD@LY%_i_|{Gk=qsOb+!MQ z4!Q6XHM8rB?0Kx=!O3oecH$1ktfb|iT6+(Bf4c1BokP;-@?>cLsp<8%0`fiaNQunp zlTXoxdATq~J(?*>&mZ;@jrjO1>rprvA#IBG7G?TP@-FYS!nPWkKU}{h)$IAYcQTA@ zUM7n9Hy8Cb7MQOq?9uM;5>L=$-@Xp}oUL&8{7trnwOnIW;a(?Wp9qi3@LGGzo4S#& z@gb9Y^O8DL<<0m9oW+m(@+HWL%>8G%8@jDZv8EI)Rn@}Q>#Qrw|IADD~h7?>`goL&$?M5Nc)IXFQC5oAKYDH4hj zoPINl67m>iw^Xg#K-iw$(*%h2WHvodX1?e2`wt$t%pw zzn_l~oDe=eN^mN#L8@MLV?78vfw;w=$9H3-Wx9HL`+EBZK~!d$%Dc?t{esO40yDto z1$7w=p%+n?nVJTfazx|_vw-w)(JyvPE;rQW7E|-|R)Vc^Hd_B!btE ze6eX1*L~jsS(Zb4KgM}$u%~V*JFyMZd!oqhHEaLvJ^w;rDMqMQS4S5Xpa{(nS%4au z7#W*^mXnFuEmLy~^P6{W+_JoR%j%}ZU5Hz^?_t8?jx{E3TUy_?w7Fx2gymgZt9!PX z?F(Efz$Cy4fpkc@wG%P$4zHfO0e6EN9wn3B+5dl&lR^;dA!|G+} z6j1>3YUuW5MPUt5cPgy}uYfY-K2cQ-@=kTN4JhAmMFw$MI{0^`NLMZstzDh%aGdCZ z>qJ*~S9cHa>$_2N095P=lJWtDd`n-DxESfAli3LNi>s zOjuf)ZlI`j8#!zEfQIo&7zHwZDhyyF_%-(}yesvFz`tJOR^pwZ`FBx-!7@wFiHW-)8z^)PTQ9u2@OI z@D-J(aD?adBWl3usJO*fYQQVHG9U+x$jX$Imcv(xFny~rnb-K4>HF8J0e@C`>PLn^ zB>8~l^j9){O~2cel3<-~49dUmgs42}^_*}GBU?vv)~TL_W~>sQf$Od!mle@;>$%zV zSk7SV%jN4XX2tQ|BN9HtL0=P;7r0|Fe4ea%;-tu)P0eM*DWqZnzhv4pnDrEOY| zQ5;33>-YZylX0ahT@nO_uJqDxcctID)PG;P9%%PJC|$3=qBQdZ()FM;^INq0+ea@*5Zo>cU>7{*??NEwOjp~E=YJP>lgB$#x|n>=LLTPm zTD$$VZei-m_xwdh#ZslU1kUZpC?782OwX-3y7tfDOuxGP{eXi6_(zZslaP{-l9Q5A zkdaf7Q5+|yJV8!*l7fmFg7P>u)d}k3Cr+I>dFmuJ&8btgG^mHfzYY>#p5|98qf>VU!_M;PQ%!&zkwmV%;E#0dj~9 zfTBUp^vI>Ysk6PQ3lkW5r_PpcxW#w1^>(+TYtXL#zV3nk-ob(Xp`pPC!^0yZBcr3E zV~En>_yh?3f>+hl)HKNb&VbRDwl z-VlI#R?U+jIMXdgGX{ufmBC*A`mA_icivF&ta>GfT>j*Xkjr0`REYzAA}v*4rs|Ct z>4v>AeL1@0VmB#sQ--V!pFJG$R zjnxt!zT6x%IzQFAli5{3dgSH9wp0b3lv_9EpAI{2ySwwq&2#UDr&^|WV8+%n7-WDj z#$o11+%Eo(Kl9l>13{zzkb}l&D~;o*l?Ljgfk72e)6&zNW}rLEbe7>fM{O#|<)-`I_UpUMx)z78;_}78yFZH$f zS@Dd29hea0>38jy;NSj79hgw)C)S#O>&IWX_WaK|l>J*j{^I`fSM>uu3+#gYAo@9n zvj2}A{15}b6~WyfQ=b1*4t}tY1c_^8^Wo|aKEX@e1jPRcaRv7C5B3W{Sbc!7gaB?| zU~oiWNCd+0LrH!pBn89sTf(CtNE8H&f&_5>LL*{;^A{PD5EGY#pfHotlTtH*r+|+JX|ABXLNS*J?6RY)r=zR4qr0!OdjLcJ>+S>cABO+;1qJXc zwB=WXzvWvULg^brD1EcwVD=Cwedx6g9E8C(c(rFRGJic;;rVTfJStU-6JBnEbBNiA z>xyil@j6m5C(~2o)eiWJY9DKEmt_-AbY?#j4>WwYQq?NG6L#|Ii497dc z_(?{e_C8ks!v0;n{U>5sX!^BxOwPI!;L1N)v)#*IG>lJcz2y5nGdxOVg0#65#QW#3 zi_ol(kK?O+seCo{VxN3go=F(D=sw>aMIUy?%aq)Dr+gnI3CAZ>dzv5hQ+7xzrioqTeopk< zy4sag|VYl&qAryp)UrSb)pOBB<-@ zau6!Z!0VsDUN6C~5$|wKFb+q(!!aOi#5i0-3&CM)>l*6l8G(19zM+Ysk*To>V&@6| zXe;__-}0XLV~fyL6Bf9IqjuqukL5wEDKsjRB4s;R4~MdvD%DuGT_&8@91ZEdaXZO9AI+0owB z)d`~7Fjc`wh0aui{R4<8IM};@ztWPT_Sctq#O(!TCn0Vxh~dj90I4UDtKj%F;L;|h zX5cmm&@}Ws_yhr`LDqxzHfsLzTdv`l4Gij07`DRnWmR2=kGO^(W3xlB*Keth)wu4c zvfrb&E3$$%Ey|wi_2#HDT5}hI#}_>nFM4h}VDHtP=*J~>V)N_Wu`1#G4)nMWcd6Nj z9ad7Y+JSdfWVFqm` z%}f2LEV^oc`EE&Jyq?> zD!vZu>2%}i?uwKh+jhR-rJ8^6fcjpZzUQ(zSewK*NX=<$Cd%zxI6?ODx^{u(bC;x# zuIWRmb|$~jxtt$WdSvXF&U|EsgGjBQ)3*G5W$~wXEd!ogpSa8w=Dm8-=Xv-->-6a3 z&h4*5Rr;3KapP-1g+iGu4z~JFD zci^puC;2)_918Be6-3_7xC~68Q1QtFibTd`K@lH0)}cI-d@ zh%LQfuy^|Wf_-;Q^k*KiKWtxxP`u>i0a1jIE7der5tBGw4J~~QZ39gmL-4E8(lti> zs!+=+aIHcE5hqwOu3BYZwMwV}<7Q>9YhbQxXs!ogWUg;?11KeiCO3^tZz3Rw8wd#E z)@`s}1=m$;8yj1+pZUUw7}|W4-~|h0a99Ob$ADFK0ii*`VavRc$WVmXg-}Nz!XrVe zAQm_zkr2`GD3b(wn8eulB#ePTd?Ml@kd&O7oC?>~%=C=xjI5ljoV*;=KMtTpARr0X zoWlF1g+--s)45+$W8B=2i*)mKd?3fL9MMMAABXM7~c|&afitx>{B6=7@;Lo#_xG zChC*i2{#6I@EvUM9IufH93zhP)w&+Z`wI_=s)X)X@#)a2^LhEylNB28c5eg#a7OT5 zQgJc<%x>D!v`4Q`Z>J$DA1*$!agjdARL^ELq#_OSe)G@e{r|_~%O5Zx|6Sv(9}!Q- zG%s*H`l&1Ks^$eXSYF{u17m0X9e%l27cTr8hwf+K|<*$6lJ5fu5fsqHpa8#NYADf<aX7c%NYtX3O$?iI;%lVh{qx$_c7?c9S(%YBE~lKk3^ zak?0^hI`gxOP{n4Yc2Dz+k#JKU(niQY45UyPa0osRZf8` z4aPe%GO|!e<-mGQNd=)qt7@pIX{l;xt7+=0Yw3aQDs=b1QBi-moMLLJA=FZHeYBPu zn_8Hd-!{ErX@1kn{Fc=XKog=+LI4p0d=Lc=BAiVSHh}a3yx=O|@}IuS#)1(fVgZ?) zoD4~no|c}Ok(r&Dm7A3VraOSky$=AvyuuO;LmDuG2t)I;8ze>>2=Rdg!ZgSj08$XK zf~>5@n6iS0IeL$+ZN}KswSajtdXGhqvB*8Pqq(aCBUb>EK?gD%pjQ{TyCBg6&_Im! z&LFt&K*P#GF5i4Gtj>-;6GDm8F!YGqoK`v9mWO&WPJ3@5{uAWBd_Y z?@*x@(Gj-6s;4eIuDjGSyd*r1@&|4b=fPr3pDqtQYoaZsN=H^0D;U08>K$1hukc1S zwhcye*ks9?6;9Uu) zdNM7^K%nGFnY#S*w2$ZOqd{DA^I?}Q-b!6@g)FC1inB^s7Zx^P%ij64oonRM#>1v< zw5zN0%m2-1CZ605fOQ$(fb$Jo~u|7nxZ(FR*d3a`3QSs9#3ZD}C8Z^< z%1FXCLRwZzM((;CID088ses~w7FarKY9rRNusP7yGtxm^osD%761OS1$f8EF<^~`W zw)9-wGBL9-yK(!*%@y5+yNKulqPqYuhV>mQkY0dy!v+AycUNJK?d&nuvLJ}!`jgA| zF!+|Ma}-K&1Ktf7tf!}CAeLS5dgSB+q`wHYkVRLCE89e5p@_U2zm6qqSwwE^a{%=l z5o0I^uP^YJ1(g)I;KB(P!6Mh!101=wsi6*@m*%FrmS)5N6YSaBTLB%3QA|O6ybzij z0`UMR=WZ}_Mivi!{Q#~)7Z1Y&$nIfe5S4YuD7!E5-6p0mO9h1P_6PWG3!l64Unry` za00)4Mh0u2hgQ1~5L;~H78mgZEv;WDR-S-&;YXy$3 zUt5<&GuN1_qr3AetFeH3S#VN7!Fd(pWY(j+vkR0B-4F80kckuBo0aa?nXqw#HB}OV) zv?>|zT`muOWP2S)K(vVxM5rEIU<lZb-?WFEFRoPzdw>Q_7^;qr|x_4Cb zrkvYlf<-nmWeIT0;xo9FSkq)7H_~)-}-4!|W`PwWXc`gb6G!Q5$(s zU@|v00nWosQ}bJ9$i#p921fMdw$<%BcVYCmv;t)bYghmvD*zi?TVxCH`|C^}SO@s} z`S}O<2LuKL1_cHM!!iKHR-?v&k)R+E8yO8E60pKV=YDVvOo9tWT4HiK1ZXdzfyghR zk%fpbp^=J6F=eM^WMkA{va&IzN(K4oP7}~%rKK<(fT>b>EldY>RV|G*n9%^7VxJs5;^R z)!twag1XwM1H5tdp0f^PjR99QMegmt^?Je&{HqC)Xhp5og} zBL;eQ;DS6o^`FVp|JvE%@5oc7-{it;YisKud?eKN%)rpt$k^1x6hzT1Zd%-dOtb=} zB}Ccxue38|nx8lRM&kd%>-oSB%Cm6Qq!Y9ODMmyT$s6=vsw zY8nWVE}bQTqy+x+U_M_`UR_#IQwBbuRfrELn1CW8FmY9Akvv^}>@DjVTE}Z#%byZVyn23x)d~N?*lzCm z&>EHUd*?s&PRr)0kD3-u#I-7B6Rzj9$vPmgr;Kwnh3t#g2Q7o&?^yHN-=gV)~by&NwS>wecZD;K|^j(R~G=R?Vp3B7$gXry-;3z~mai;~sdddAdUbBDT@ z>^)q5u5gak6Nf3hDq8=(DnG zETC_qqi>1^dJtHpRtzdqx}OISa~MF3Ol}yPBAvkUvttF-A7dem5nqHU1N0Z|9i1H< zT^yYtTw!T}kzjOl_i*Far&ANR$L^!Z_thf|8Wb?{0{R0)YJk#El zScalkt*K-~_HM6hCSh^z1;!DNq|)X}%Oi+)cFpV`?Q2^fPk&e1e2$HRY5ihHgUN`y zyLOe+9$LZFOiOun0kITEQ~f|k?_U;8y8W-+6_T!?s{N4s#5$;TVfQ(j!1~flH#XZm zJFjbu|IlH){tH+<|$;P`WJEU_0zlG&whO0IH~*vjGTV};vOAbol#7uaNTlrQZiD~ z*QI6TKuK0cRsoimvT`e8vMTalipeS}gPyE9tTvJRiK>RG8rphj>S$@}!m>dJwQOGU zYc?_gL0M3gH8V3qKvFku+`M`77C@!Gq;aptOZ|0-$taA1*RK(iV3ZI6m;+?%kP@E= z`-Y4q&}3F#W;Vz=-^U0#1Gb>3xU9IOytE91j;aJ+8YT(AsItDUW?9PwmXw&iLPJ+O z65Z_>d4%Tf4uq42UP9mwf@mXjb@z4k0OkOJ9iTDT)jJ4k&fUF3-F-tneTY{=FLDz3 zEbF}7WqiBE0H7pde*g%oW#j?oJTf``aC-LfXT;>IZy_dEJ0oo2i$Pk9#r|yyqpq>x7_T*K>GN>KzC3}(UyyaIhWhkA6+bzR3?yZSeNay_Mu3a=*Y42 zF3q=2qzw7h+G`|Jrad0B%-ir#S^bHRSZUG?x~&u3embm;^hHcf#epy1ii6Vlt3lJS zrsPf2wZ{ex_tmQBGb|dU{OLZ1CKq6QtyoUSd-jRRyjTN(~L?o$Y zu3wjxlap6m9#7Qa%nI;Hkg-K>tU898ItXM*3p1X4iCX$TWC`U`Bhy=O zU^TmGdDFrQIg~7^vDm@3#2U6Gu*h-&4+`ZgAygfX9K%IJN zW78DTJ^N^u!R6c=O4Qd@??vC9!ec0D+WaS-MI<2BBkuPwU=z9Tckg4&BY4E;<9SH0v zVF7|FQvaC+h*WJW!D-9wgn+wzeiU(7S;&k7hn0JH7aU&h*N>`mB)*)W>8Bsf>xQ#$ z_~@ntcRKwZE_^H_PrS@aSgvk_oY5o1`lVau@!n?=P{%BCNxzs?QsA?u4W+rkBS6Y3 z!fvUz;g$Dbv{hs6T72xivl3QOd1u(ng#)^5XzO+zql(3J8YfAuRe3XWQAMAuSi^+k z&Y_|YMx80^JD%d#dcP5Tu;_9mNqX*#=1?39cPO);?#ONDCv2et_-E%W8#fy=9@!s2 z*JrI(g!f84BwXKC{1=zYA%Q2G+pv3LlNVA?F5G>FyC>up_xVjdf<1Os+itAie7Z|z zgS%Cjm5kZZy~3r`^s=|PMwJ68g|}CYu%v8zb>uq1(o;9OsCJDJIlaK&vb$D94iwuh~slGvs`3h zyTr!9$#IDbq%gR+c`+Jo0s;bpf`Wht7ZMWsU4|{@IZ{wiQbhFFz@S0{_9bc>D2x() zkPr;zm#@+1eF>l^zgn3rZA}n}hk^0Z)@1el1Vr;K5YQypSKPY?M-!07|AB*vH-eyq zn~AR<;#RS8GYJU}2@OHkC*h%CutA9cRho#1C`6YgIwm>>03R3x)fW(|w6rut7AhMQ z+4A%93lX>^U?q!65qu;VRG=DA2-Lf(1`e>*)qspd%qoCLi%@Bo$h3fzY-mEgD_WYt zz5?;C0Mj(M!-u^*E0UuePfauf63Iu!!1^xhom|Xe! zxt~FMm?dpCP}E)lP{NvI3K1xM46hPkC=sN`{EMH!dboXw^`ODJ*u1ye75nswiJ4td zb(Y6)JTh*yF>G z+0m-DMdX;iU&q*rd*xb=ofXAQwt_?~ACo*WFUM_-Y8}uMZSrf2XThW93s@_mD@MT#3o+_~HwPch`llZ?LDak?43aep|&pfux7w zm3p`R<(+$;|R+FEGBaW{OvKB$l!# z-FjK`fMShh!jK9+>r9_=$Gz>`Tt}^ErB#J&4q8;o+3q{_3r6E(AM(OP{oCmX|F<}p zn1SiGg$44Y+`eN4uZpGBUF2bbcLl+W0oubIx)@I{KX2avA4IbsVU+m?g$Dqa455=@ z&Ny(%SvuxmPCBc9{+8@9IzSG9ddk=P zLb+0Evi|%drP>j0_7<;hb;;JwjHseDqQ_MnMuPkni%IaAj7J)*Gt6HVY%w0GQJ8dj zga#vp3adF=p=g7OZN*(deKS4aw_E#-Py6r-I(Io9Gd7D7Kgn#ip@%ku&qu^*(aoJ#gh!2dOpt}*rGgtrv$2Fb zm$C1JM=gGxqT3hi&d2aL{nP1h|JggV7IKG1^b-(WOjHL9@dh%4gY?g@a}c!1YE{0U zMv$*eVN2~#L5+mn}@hta|Wk_a$FfF6Q*gwPu^f^9|5 z%oVleaA}6&3N^jNcwM4Kmx$eEZ9_|aBSL^hj7F9Tu*luH4c;f@Lz%B2EP{BAz-p(b zw;$<%Q2HC9dHJi;(efe((*b>Me=rZ#(v@g=ivy1nv_J4ZP0c)lCu;W56SN7Mdp0*e zkKkS5m3j+k*Y7}G;bQa+`kPHyW0yX5+}~;zf2K>;VV%C&`wyIBq>k-Q;q$kmp|63?s75A& zX1}-EaT;wHq$S@;v%C5RccTRO7YndWl-iUaG(s;sOR5n>_TmpQOqd z=%1%hJm{rVD~=sMbDo@@Ix?R5%?V|m0;)z!q3ieuY8}+3Y=Sns;%SP!Dv30ZvKV@J zL$>ew2CbcjYNiCZlLkAdUTeO8&imjf<$7gr>L__DC(jav!*M|bhE2RpM#IT3QW$D( zr`r}W2fZ~Jx7_U7qce9-`&GfG7-2aqGx0Eiz9wTg%fum$VGBOd$)MZY_-lsw`N+90 z$!}ux+iS9QFPwv>p6olsn(e$#`t}+j6JY`GS=@FJ8$HpzcNVF~#BbKT#XVrZb2rCE z;eKhOYu3kQ=v#Ls%qA3uKL#EFw9Pf}A;pE`AlhK7cg7F8TPefobyb8s1YhDu?tlzhOT z86uk4CYTcr=7NKqZxFF0bPTx*m{8k$Hg?vwi09bDS<6-mMp%;U%|wK zIt-8&0gV`mP+(+T_V~f+d1wp}{3?K{2_UfwGUI{+DRqzoUGJVJ_h5q|Kxmb7y$Tz` zT)=693b7lSLEMZSMSlQZ1;A&>Ivu(`*a-~-Lb!V;_!{XXc6X9`I!VONUSbzeKY*e0 zq_^iuA8}m+0ndG)fIu1ap}}EFc;r|5{V!hv{_qB{-cP?SfQ&+nN#*`MNd+ItM9hW1^jwS8wsQTy!>|a}aD>-vFJ1xFXXqX-E2P6(?;l#r92T9{p;tHs(s&vCY?uhdm_Lz9>^4 zbPm=9E=}jvy7P#HTu7Sdf#b`@nj%^Ig_}_J!?)3b`dM&7#Z)^-R7v`T&b}92*+5l6 zsCbT;hUgdDcGTi#`niTh>VD#^o*Tyd&1A(C%@mE^yTrh2aL1jw!w)TK*LjEFhw+oY z5oB<;_WexfBiH$%V#Vb*&zshwx8JSkm$)%ODz@wTS-EQY?L_-EyZ47auYOpXx#kw! z@a4;*<@aaUF-leXAc*nODpSokg%-L!D7Gq1)X3`5j=Rqkes zGK ztx~SqKKguNto4pwQ(v0CsFyjBK?S0)3)O@ZSZYRhOiq-Ud*LOw9labm!E3BLGeRpY zU*+W{>y0Iv+NL^ZKcwL*EU?{TQ`1 zY{l!43XlmdEf`cNfD;DwY@8X2)E>#s%++i?!!VDZoEAMvc$MG&lrUXAGkVI zmKOatcR!4?Qv@J#CSPom`zF-?RjW%{7F0H>DkuS2Sxxnj#vwJWBkDTpnnyKt;M%$f z9X-RNa3g&L1OXwCrVs>VvGfnB6~?yaM(I=*^(IvEy+ESYib| zL!@qEZx87SsTa`sz$_mo8|1(mAB7f=ff%#_41!*B3aJl$SN%Z%f)e=alOK~$$DoQ4 z5c-AEcjStZsdZ%;n!X~(eSdfr`F8?P89yw939`C+_?pqXcgpq< z7+}0%g!gv+mRp^#bXX%rAIqm7(biTXWY33|HH$At$5%{^!f2;GB|a%MO>7}W8ZTdw z`(>nyE{kiPah3OUKfI;HATl%inY;~qFq8C zQ}bJRaaK9RZ0XQ-L<*;w?+G;}Og%T&Gz%M)Yho{)gqaCZk))39vz&Cb=M>aTL><=3 zlWjOS&Y5g^nUiv3k4bk~$VW|04F-d$tAmWs-wFAcnwtJ)t)@KyqRE>@ zfL3&NbOHt$d9%pH72H&=Zs4wR_wexW^!#pZ;D4NI_{~-WdB~l@5B5|E^a6A{T)k1gx-i^ttqQ>Fo+I4!!Qo*MM*_;GO|(@T(`k5i$PN zkpGrn{KlOBN-l0#^iZI99Z+b20EPC}whl7uPG_{5D%Kiyl;$oq(+1_+47u~c3XM*(@y=u_1D<;N_PqV#5wd{*@+FKo7Ka44cp+sQ+i*6|o@Uf3OoRqKT6oXB`@Q{0CS(0n+H5YY zT)&-oa*vXN{}URPJ(3(Fb+^hU@-vIG2y!B;%e>E;Zi@x4HLoo59%lGv+Y=9NjDKUZ zCO<+RKvxFTqzLeXAKjDq`|mj6;TzQ9|42vvS9JN|cbq>IjjoF>UYo9MR{viYU4Y;M zq!vjavh0%qYYsB93bOJCJSdc6J5`J6uQd**_Fz|R5aKvzik&moZ^;nCrdaS>>2 zWORH~OaeMKG3H8AEG9V?b2Sc|awR?$laPi@1h8KQI1t@;H8UwC>uOpK#PVh4ex>** zmM_K2-2nL@iVu)`c@VP)fV~Pbs)s8nt1d0CDXSz@;OfBEcva0^9KODq&`?u*4-XI> za)~JAfl28R{T9Lj$PHL_Cs&EKHMO*Vcg-CbE94z_;^R)}fGAsMcP~h@b%Qh;2?W{( zNFdPG_w9E3Nbi&Na9iI1CEWJ?z4kv*Tz>Oh{jM|o^`9GwTCxm7VRb-|7RtpfE-byH zREqriM3tbBIiOM`L)R#0k7ba=#>6{WbP+nLJclZ>7T<4G=SoXxUBDtm5D8yJmmP#w zu3E{d?wlRYxP;Tl!i+G35VGjfSFC1KJ^Z1{t*+Fz(SahmnBwAN(3rhLHQp0#neE8E zutfL*JzN=CsHY!bPR~(bDq|O#jLsW*ACdjc=8F4=NGpGCGI(QZ0>qQXy;tSfZuV1D96RiG%3U|8kd5x%S=3hdaBmvPostB6==sU zY~~2FyD%4~J|3MM^OEQC?kBE-5wwcO(j@>pt$km%g(q9Dw|-HlnVM=7v7-XPh{#hW%XwO{}CXSpf22aGLrFa(~h!_A@Z zQNC2m#qg=Dy2o_fNtSH_uTmpL=*`o_>C$%iEt-Ddgjr{d6ZJWb+}-qgob(3rV|Tb% zq@$DFrwa@6i>8ZiRZmYBua!NJn7LK;#C@g&|DtH7^v?41%Pd!5nT2JYOMqe4PRd?q<2< zVH0VcUbEcZeWg5Y-Mm*9Q{%B)7tbX&@Qiw75sv0lONyXx0hU!gf(Pp&Pz3sh-4+l#*>M zJJel0yuk59|`W&U5eZb$xGraQ&^UP0zEzeL|I6)V$s#V5GqPS`PetbPG z&gSj2A|Zwa+{1ch^|Ctcp!sSIHmi&gmB>0NoazU$Z3-; zU^`D_P>Yw$>KLv)waPe+-oIzA024yP>A+I61f%0=ju77+xudeF`3AxhT~;W-4r5D; zKQ1L8qmf=O@<1aLpDXFSyOmQ~ch}XxQW4q35QnYH(n)NBhpi(gb{iK&h$!aTx|tYu z!r9QF{-T&K7B|NITNBUcXkL;&m>$4%otGGuXPY!>Nb=SeObqnFej<$W2|8imYlc#F z;(GF}m$MNZLNO(~1!EJbc=Bc=l6wZP_{}$$sJm~$GA%TQ4wc^-&14L)qz{)=ew}31 ziQ(rR;@!8o>YDJJH;SchV0@%HUf&=lUBWVdS3|REdq@JjAw4fww32yv`F&Lg;d*$Q%G_&=t3gCg^!YXgyM&G!uSzW$ zfu64EJhq+A{@DgXqJ@w9dSV{5qNm@9#k5|o3wYow$Kb@mQ=(*Cy7ysBFDzGJ38#OA zt2u|d5w)NDdP?agoP2ZDE}CF-7{zz^ zL(jXD0t3Re31{`abZtHcXzW^%y2v!*VJldKFG86Fd~v1Zv#8JICkBCScFS22hq{NV5LT8i zHfrmUNw;mm6J!y6=NuRZE@nDv*#uOu-OyajI= zuzEJ=rp3ZcDVea{HzM;qR)e+f!M?p~J!TtwVj-o|Y%Qp>kQP zdgOnlM0VvS{qV5kAbs-5xXO%YD_zRNA3F`hB(r|Gb3$q9W7kEdcNU5KuhgGOgfQ|~ z%J_bw#Gv!uhSHkZIoZ_avH0 zH{)=Z->JVgZ(n)B9hAG>J(JmDVx@oTHDdqtd1gE6)q#dFs+#LYBwNAN!O4XXfqkY; zni{J^jbp|}e3ovW=Tzl1V3m@wpXYqjX~e|^YY6>^*bh0miv{2m>t0@9ADL5cEj^=n z_m|OmV{Z@fmwO{?KO){{yyfDjmd4-L@qOzc{b@|q4G-JFw18d>8|UZ>G>|D>h!Ygz z+rn;oliN`+s20la<%6zmI{QPH{7jhZE5IgZzgs#${Y_xO2<#YrJ^!3RqcwKK baVKZK05^YEK4V8;XXle9r*zz0HmUv>n)m4_ literal 0 HcmV?d00001