Blog - website - Website contents

commit c3abcec683384c5aaa5140189f4226cafed68239
parent 7d06a346d64a47ff7645340ba6a41a34ee65b7db
Author: Reagan <rfische2@uccs.edu>
Date:   Wed, 21 Aug 2024 15:51:10 -0600

Blog

Diffstat:
M about.html  | 2 +-
M blog.html  | 2 +-
M blog/libc_22oct23.html  | 2 +-
M cat/index.html  | 113 +++++++++++++++++++++++++++++++++++++++++--------------------------------------
M contact.html  | 2 +-
A images/netscapenow.gif  | 0 
M index.html  | 2 +-
M projects.html  | 2 +-
A projects/cminus/code/hash_table.c  | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A projects/cminus/code/hash_table.h  | 14 ++++++++++++++
M projects/cminus/code/input.c  | 23 ++++++++++++++++++-----
A projects/cminus/code/tokenizer.c  | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A projects/cminus/code/tokenizer.h  | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M projects/cminus/lexer.html  | 501 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
D test  | 1 -

15 files changed, 998 insertions(+), 83 deletions(-)
diff --git a/about.html b/about.html
@@ -45,7 +45,7 @@
 
     <footer>
         <hr>
-        <p style="text-align: center; font-size: 0.75em;">&copy; Copyright 2023. Made with <a
+        <p style="text-align: center; font-size: 0.75em;">&copy; Copyright 2024. Made with <a
                 href="https://newcss.net/">love and NewCSS</a> by Reagan Fischer.</p>
         <hr>
     </footer>
diff --git a/blog.html b/blog.html
@@ -47,7 +47,7 @@
 
     <footer>
         <hr>
-        <p style="text-align: center; font-size: 0.75em;">&copy; Copyright 2023. Made with <a
+        <p style="text-align: center; font-size: 0.75em;">&copy; Copyright 2024. Made with <a
                 href="https://newcss.net/">love and NewCSS</a> by Reagan Fischer.</p>
         <hr>
     </footer>
diff --git a/blog/libc_22oct23.html b/blog/libc_22oct23.html
@@ -32,7 +32,7 @@
 
     <footer>
         <hr>
-        <p style="text-align: center; font-size: 0.75em;">&copy; Copyright 2023. Made with <a
+        <p style="text-align: center; font-size: 0.75em;">&copy; Copyright 2024. Made with <a
                 href="https://newcss.net/">love and NewCSS</a> by Reagan Fischer.</p>
         <hr>
     </footer>
diff --git a/cat/index.html b/cat/index.html
@@ -8,7 +8,8 @@
     <link rel="stylesheet" href="/inter.css">
     <link rel="stylesheet" href="/new.css">
     <link rel="icon" type="image/x-icon" href="/images/win95.png">
-    <link rel="apple-touch-icon" href="/images/win95.png">    <style>
+    <link rel="apple-touch-icon" href="/images/win95.png">
+    <style>
         .blue-box {
             background-color: #007BFF;
             padding: 20px;
@@ -50,48 +51,49 @@
             height: 100%;
             object-fit: cover;
         }
-    /* Dark Mode Styles */
-    @media (prefers-color-scheme: dark) {
-        body {
-            background-color: #121212;
-            color: #FFFFFF;
-        }
-        
-        header {
-            background-color: #1f1f1f;
-            color: #FFFFFF;
-        }
-        
-        nav a {
-            color: #BBDEFB;
-        }
 
-        nav a:hover {
-            color: #90CAF9;
-        }
-        
-        .blue-box {
-            background-color: #0057C0;
-            box-shadow: 0 5px 10px rgba(255, 255, 255, 0.1);
-        }
+        /* Dark Mode Styles */
+        @media (prefers-color-scheme: dark) {
+            body {
+                background-color: #121212;
+                color: #FFFFFF;
+            }
 
-        .cat-image-frame {
-            border-color: #333;
-        }
+            header {
+                background-color: #1f1f1f;
+                color: #FFFFFF;
+            }
 
-        footer {
-            background-color: #1f1f1f;
-            color: #FFFFFF;
-        }
+            nav a {
+                color: #BBDEFB;
+            }
 
-        footer a {
-            color: #BBDEFB;
-        }
+            nav a:hover {
+                color: #90CAF9;
+            }
+
+            .blue-box {
+                background-color: #0057C0;
+                box-shadow: 0 5px 10px rgba(255, 255, 255, 0.1);
+            }
+
+            .cat-image-frame {
+                border-color: #333;
+            }
 
-        footer a:hover {
-            color: #90CAF9;
+            footer {
+                background-color: #1f1f1f;
+                color: #FFFFFF;
+            }
+
+            footer a {
+                color: #BBDEFB;
+            }
+
+            footer a:hover {
+                color: #90CAF9;
+            }
         }
-    }
     </style>
 </head>
 
@@ -103,8 +105,8 @@
             <a href="../projects.html">Purrjects</a> /
             <a href="../blog.html">Catablog</a> /
             <a href="../resume.pdf">Resumeow</a> /
-            <a href="../contact.html">Contact Meow</a> 
-            </nav>
+            <a href="../contact.html">Contact Meow</a>
+        </nav>
     </header>
 
     <main>
@@ -117,7 +119,9 @@
     </main>
 
     <footer>
-        <p>&copy; Copyright 2023. Made with <a href="https://newcss.net/">love and NewCSS</a> by Reagan Fischer. Cat images sourced from <a href="https://www.kaggle.com/datasets/crawford/cat-dataset">this Kaggle Dataset</a></p>
+        <p>&copy; Copyright 2024. Made with <a href="https://newcss.net/">love and NewCSS</a> by Reagan Fischer. Cat
+            images sourced from <a href="https://www.kaggle.com/datasets/crawford/cat-dataset">this Kaggle Dataset</a>
+        </p>
     </footer>
 
     <script>
@@ -138,19 +142,19 @@
         }
         displayRandomCat();
     </script>
-        <script defer>
-            var _paq = window._paq = window._paq || [];
-            /* tracker methods like "setCustomDimension" should be called before "trackPageView" */
-            _paq.push(['trackPageView']);
-            _paq.push(['enableLinkTracking']);
-            (function() {
-              var u="//matomo.thespringstechguy.com/";
-              _paq.push(['setTrackerUrl', u+'matomo.php']);
-              _paq.push(['setSiteId', '1']);
-              var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
-              g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
-            })();
-          </script>
+    <script defer>
+        var _paq = window._paq = window._paq || [];
+        /* tracker methods like "setCustomDimension" should be called before "trackPageView" */
+        _paq.push(['trackPageView']);
+        _paq.push(['enableLinkTracking']);
+        (function () {
+            var u = "//matomo.thespringstechguy.com/";
+            _paq.push(['setTrackerUrl', u + 'matomo.php']);
+            _paq.push(['setSiteId', '1']);
+            var d = document, g = d.createElement('script'), s = d.getElementsByTagName('script')[0];
+            g.async = true; g.src = u + 'matomo.js'; s.parentNode.insertBefore(g, s);
+        })();
+    </script>
 </body>
 
-</html>
+</html>+
\ No newline at end of file
diff --git a/contact.html b/contact.html
@@ -34,7 +34,7 @@
 
     <footer>
         <hr>
-        <p style="text-align: center; font-size: 0.75em;">&copy; Copyright 2023. Made with <a
+        <p style="text-align: center; font-size: 0.75em;">&copy; Copyright 2024. Made with <a
                 href="https://newcss.net/">love and NewCSS</a> by Reagan Fischer.</p>
         <hr>
     </footer>
diff --git a/images/netscapenow.gif b/images/netscapenow.gif
Binary files differ.
diff --git a/index.html b/index.html
@@ -87,7 +87,7 @@
     </main>
     <footer>
         <hr>
-        <p style="text-align: center; font-size: 0.75em;">&copy; Copyright 2023. Made with <a
+        <p style="text-align: center; font-size: 0.75em;">&copy; Copyright 2024. Made with <a
                 href="https://newcss.net/">love and NewCSS</a> by Reagan Fischer.</p>
         <hr>
     </footer>
diff --git a/projects.html b/projects.html
@@ -156,7 +156,7 @@
   <footer>
     <hr />
     <p style="text-align: center; font-size: 0.75em">
-      &copy; Copyright 2023. Made with
+      &copy; Copyright 2024. Made with
       <a href="https://newcss.net/">love and NewCSS</a> by Reagan Fischer.
     </p>
     <hr />
diff --git a/projects/cminus/code/hash_table.c b/projects/cminus/code/hash_table.c
@@ -0,0 +1,123 @@
+#include "hash_table.h"
+#include <stdlib.h>
+struct hash_table {
+  hash_table_entry_t **entries;
+  int size;
+  hash_table_cmp_fn cmp;
+  hash_table_hash_fn hash;
+};
+struct hash_table_entry {
+  void *key;
+  void *value;
+  hash_table_entry_t *next;
+};
+
+hash_table_t *hash_table_create(int size, hash_table_cmp_fn cmp,
+                                hash_table_hash_fn hash) {
+  hash_table_t *table = malloc(sizeof(hash_table_t));
+  if (table == NULL) {
+    return NULL;
+  }
+  table->entries = calloc(size, sizeof(hash_table_entry_t *));
+  if (table->entries == NULL) {
+    free(table);
+    return NULL;
+  }
+  table->size = size;
+  table->cmp = cmp;
+  table->hash = hash;
+  return table;
+}
+
+void hash_table_destroy(hash_table_t *table) {
+  for (int i = 0; i < table->size; i++) {
+    hash_table_entry_t *entry = table->entries[i];
+    while (entry != NULL) {
+      hash_table_entry_t *next = entry->next;
+      free(entry);
+      entry = next;
+    }
+  }
+  free(table->entries);
+  free(table);
+}
+
+void *hash_table_get(hash_table_t *table, void *key) {
+  unsigned long hash = table->hash(key) % table->size;
+  hash_table_entry_t *entry = table->entries[hash];
+  while (entry != NULL) {
+    if (table->cmp(entry->key, key) == 0) {
+      return entry->value;
+    }
+    entry = entry->next;
+  }
+  return NULL;
+}
+
+int hash_table_put(hash_table_t *table, void *key, void *value, int replace) {
+  unsigned long hash = table->hash(key) % table->size;
+  hash_table_entry_t *entry = table->entries[hash];
+  while (entry != NULL) {
+    if (table->cmp(entry->key, key) == 0) {
+      if (replace) {
+        entry->value = value;
+        return 0;
+      } else {
+        return 1;
+      }
+    }
+    entry = entry->next;
+  }
+  entry = malloc(sizeof(hash_table_entry_t));
+  entry->key = key;
+  entry->value = value;
+  entry->next = table->entries[hash];
+  table->entries[hash] = entry;
+  return 0;
+}
+
+void hash_table_remove(hash_table_t *table, void *key) {
+  unsigned long hash = table->hash(key) % table->size;
+  hash_table_entry_t *entry = table->entries[hash];
+  hash_table_entry_t *prev = NULL;
+  while (entry != NULL) {
+    if (table->cmp(entry->key, key) == 0) {
+      if (prev == NULL) {
+        table->entries[hash] = entry->next;
+      } else {
+        prev->next = entry->next;
+      }
+      free(entry);
+      return;
+    }
+    prev = entry;
+    entry = entry->next;
+  }
+}
+
+#ifdef TEST_HASH_TABLE
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+int string_cmp(void *key1, void *key2) { return strcmp(key1, key2); }
+unsigned long string_hash(void *key) {
+  unsigned long hash = 5381;
+  char *str = key;
+  while (*str != '\0') {
+    hash = ((hash << 5) + hash) + *str;
+    str++;
+  }
+  return hash;
+}
+
+int main() {
+  hash_table_t *table = hash_table_create(16, string_cmp, string_hash);
+  assert(hash_table_put(table, "foo", "bar", 0) == 0);
+  assert(hash_table_put(table, "foo", "baz", 1) == 0);
+  assert(strcmp(hash_table_get(table, "foo"), "baz") == 0);
+  hash_table_remove(table, "foo");
+  assert(hash_table_get(table, "foo") == NULL);
+  hash_table_destroy(table);
+  return 0;
+}
+#endif+
\ No newline at end of file
diff --git a/projects/cminus/code/hash_table.h b/projects/cminus/code/hash_table.h
@@ -0,0 +1,13 @@
+#ifndef HASH_TABLE_H
+#define HASH_TABLE_H
+typedef struct hash_table hash_table_t;
+typedef struct hash_table_entry hash_table_entry_t;
+typedef int (*hash_table_cmp_fn)(void *key1, void *key2);
+typedef unsigned long (*hash_table_hash_fn)(void *key);
+hash_table_t *hash_table_create(int size, hash_table_cmp_fn cmp,
+                                hash_table_hash_fn hash);
+void hash_table_destroy(hash_table_t *table);
+void *hash_table_get(hash_table_t *table, void *key);
+int hash_table_put(hash_table_t *table, void *key, void *value, int replace);
+void hash_table_remove(hash_table_t *table, void *key);
+#endif+
\ No newline at end of file
diff --git a/projects/cminus/code/input.c b/projects/cminus/code/input.c
@@ -33,11 +33,23 @@ int input_getc(void) {
   return buffer[buffer_pos++];
 }
 
-void input_ungetc(int c) {
-  unget_buffer = c;
-}
+void input_ungetc(int c) { unget_buffer = c; }
+
+void input_destroy(void) { fclose(file); }
 
-void input_destroy(void) {
+#ifdef TEST_INPUT
+#include <assert.h>
+int main() {
+  FILE *file = fopen("input.c", "rb");
+  input_init("input.c");
+  int c1, c2;
+  while ((c1 = fgetc(file)) != EOF) {
+    c2 = input_getc();
+    assert(c1 == c2);
+  }
+  assert(input_getc() == EOF);
   fclose(file);
+  input_destroy();
+  return 0;
 }
-
+#endif+
\ No newline at end of file
diff --git a/projects/cminus/code/tokenizer.c b/projects/cminus/code/tokenizer.c
@@ -0,0 +1,159 @@
+#include "tokenizer.h"
+#include "hash_table.h"
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+// Token data
+#define TOK_MAGIC_1 0x544F4B454E544F4B // "TOKENTOK"
+#define TOK_MAGIC_2 0x544F4B544F4B454E // "TOKTOKEN"
+struct token {
+  long magic;
+  int line;
+  int column;
+  short kind;
+  char opt_data[0];
+};
+
+typedef struct token token_t;
+
+struct token_data {
+  union {
+    long long i;
+    double f;
+    const char *s;
+    char c;
+  } data;
+};
+
+typedef struct token_data token_data_t;
+
+// Token creation
+#define token_data(token) ((struct token_data *)((token)->opt_data))
+static token_t *token_data_create(c_token_types kind, int line, int column) {
+  token_t *token = malloc(sizeof(token_t) + sizeof(struct token_data));
+  token->magic = TOK_MAGIC_1;
+  token->line = line;
+  token->column = column;
+  token->kind = kind;
+  return token;
+}
+
+static token_t *token_create(c_token_types kind, int line, int column) {
+  token_t *token = malloc(sizeof(token_t));
+  token->magic = TOK_MAGIC_2;
+  token->line = line;
+  token->column = column;
+  token->kind = kind;
+  return token;
+}
+
+static token_t *token_create_int(c_token_types kind, int line, int column,
+                                 int64_t i) {
+  token_t *token = token_data_create(kind, line, column);
+  token_data(token)->data.i = i;
+  return token;
+}
+
+static token_t *token_create_float(c_token_types kind, int line, int column,
+                                   double f) {
+  token_t *token = token_data_create(kind, line, column);
+  token_data(token)->data.f = f;
+  return token;
+}
+
+#define PRIME 211
+static unsigned long hash_string(void *key) {
+  unsigned long hash = 0, g;
+  char *p = key;
+  while (*p) {
+    hash = (hash << 4) + *p++;
+    if ((g = hash & 0xf0000000) != 0) {
+      hash ^= g >> 24;
+      hash ^= g;
+    }
+  }
+  return hash;
+}
+
+static int cmp_string(void *key1, void *key2) {
+  return strcmp((char *)key1, (char *)key2);
+}
+
+static hash_table_t *string_table;
+static token_t *token_create_string(c_token_types kind, int line, int column,
+                                    const char *s) {
+  if (string_table == NULL) {
+    string_table = hash_table_create(1024, cmp_string, hash_string);
+  }
+  token_t *token = token_data_create(kind, line, column);
+  char *key = strdup(s);
+  if (hash_table_put(string_table, key, key, 0)) {
+    free(key);
+    key = hash_table_get(string_table, key);
+  }
+  token_data(token)->data.s = key;
+  return token;
+}
+
+static token_t *token_create_char(c_token_types kind, int line, int column,
+                                  char c) {
+  token_t *token = token_data_create(kind, line, column);
+  token_data(token)->data.c = c;
+  return token;
+}
+
+// External token operations
+
+c_token_types token_type(token_t *token) {
+  assert(token->magic == TOK_MAGIC_1 || token->magic == TOK_MAGIC_2);
+  return token->kind;
+}
+
+int64_t token_int(token_t *token) {
+  assert(token->kind == TOK_CONST_INTEGER_U32 ||
+         token->kind == TOK_CONST_INTEGER_U64 ||
+         token->kind == TOK_CONST_INTEGER_S32 ||
+         token->kind == TOK_CONST_INTEGER_S64);
+  assert(token->magic == TOK_MAGIC_1);
+  return token_data(token)->data.i;
+}
+
+double token_float(token_t *token) {
+  assert(token->kind == TOK_CONST_FLOAT_32 ||
+         token->kind == TOK_CONST_FLOAT_64);
+  assert(token->magic == TOK_MAGIC_1);
+  return token_data(token)->data.f;
+}
+
+const char *token_string(token_t *token) {
+  assert(token->kind == TOK_CONST_STRING_ASCII);
+  assert(token->magic == TOK_MAGIC_1);
+  return token_data(token)->data.s;
+}
+
+char token_char(token_t *token) {
+  assert(token->kind == TOK_CONST_CHAR);
+  assert(token->magic == TOK_MAGIC_1);
+  return token_data(token)->data.c;
+}
+
+int token_line(token_t *token) {
+  assert(token->magic == TOK_MAGIC_1 || token->magic == TOK_MAGIC_2);
+  return token->line;
+}
+
+int token_column(token_t *token) {
+  assert(token->magic == TOK_MAGIC_1 || token->magic == TOK_MAGIC_2);
+  return token->column;
+}
+
+void token_destroy(token_t *token) {
+  assert(token->magic == TOK_MAGIC_1 || token->magic == TOK_MAGIC_2);
+  if (token->magic == TOK_MAGIC_1) {
+    if (token->kind == TOK_CONST_STRING_ASCII) {
+      free((void *)token_data(token)->data.s);
+    }
+  }
+  free(token);
+}
diff --git a/projects/cminus/code/tokenizer.h b/projects/cminus/code/tokenizer.h
@@ -0,0 +1,133 @@
+#ifndef TOKENIZER_H
+#define TOKENIZER_H
+#include <stdint.h>
+
+typedef struct token token_t;
+typedef enum {
+  // Control Keywords
+  TOK_CTK_IF,       // if
+  TOK_CTK_ELSE,     // else
+  TOK_CTK_SWITCH,   // switch
+  TOK_CTK_CASE,     // case
+  TOK_CTK_DEFAULT,  // default
+  TOK_CTK_WHILE,    // while
+  TOK_CTK_DO,       // do
+  TOK_CTK_FOR,      // for
+  TOK_CTK_CONTINUE, // continue
+  TOK_CTK_RETURN,   // return
+  TOK_CTK_GOTO,     // goto
+
+  // Type Keywords
+  TOK_TK_VOID,
+  TOK_TK_CHAR,
+  TOK_TK_SHORT,
+  TOK_TK_INT,
+  TOK_TK_LONG,
+  TOK_TK_FLOAT,
+  TOK_TK_DOUBLE,
+  TOK_TK_SIGNED,
+  TOK_TK_UNSIGNED,
+  TOK_TK_STRUCT,
+  TOK_TK_UNION,
+  TOK_TK_ENUM,
+  TOK_TK_TYPEDEF,
+
+  // Storage Class/Specifier Keywords
+  TOK_SCSK_AUTO,
+  TOK_SCSK_REGISTER,
+  TOK_SCSK_STATIC,
+  TOK_SCSK_EXTERN,
+  TOK_SCSK_CONST,
+  TOK_SCSK_VOLATILE,
+
+  // Misc Keywords
+  TOK_MK_SIZEOF,
+
+  // Operators
+  TOK_OP_PLUS,               // +
+  TOK_OP_MINUS,              // -
+  TOK_OP_ASTERISK,           // *
+  TOK_OP_SLASH,              // /
+  TOK_OP_PERCENT,            // %
+  TOK_OP_AMPERSAND,          // &
+  TOK_OP_PIPE,               // |
+  TOK_OP_CARET,              // ^
+  TOK_OP_TILDE,              // ~
+  TOK_OP_EXCLAMATION,        // !
+  TOK_OP_EQUALS,             // =
+  TOK_OP_LESS_THAN,          // <
+  TOK_OP_GREATER_THAN,       // >
+  TOK_OP_PLUS_PLUS,          // ++
+  TOK_OP_MINUS_MINUS,        // --
+  TOK_OP_EQUALS_EQUALS,      // ==
+  TOK_OP_NOT_EQUALS,         // !=
+  TOK_OP_LESS_EQUALS,        // <=
+  TOK_OP_GREATER_EQUALS,     // >=
+  TOK_OP_AMP_AMP,            // &&
+  TOK_OP_PIPE_PIPE,          // ||
+  TOK_OP_ARROW,              // ->
+  TOK_OP_DOT,                // .
+  TOK_OP_COLON,              // :
+  TOK_OP_QUESTION,           // ?
+  TOK_OP_COMMA,              // ,
+  TOK_OP_SEMICOLON,          // ;
+  TOK_OP_ELLIPSIS,           // ...
+  TOK_OP_ASSIGN_PLUS,        // +=
+  TOK_OP_ASSIGN_MINUS,       // -=
+  TOK_OP_ASSIGN_ASTERISK,    // *=
+  TOK_OP_ASSIGN_SLASH,       // /=
+  TOK_OP_ASSIGN_PERCENT,     // %=
+  TOK_OP_ASSIGN_AMPERSAND,   // &=
+  TOK_OP_ASSIGN_PIPE,        // |=
+  TOK_OP_ASSIGN_CARET,       // ^=
+  TOK_OP_ASSIGN_LEFT_SHIFT,  // <<=
+  TOK_OP_ASSIGN_RIGHT_SHIFT, // >>=
+
+  // Separators
+  TOK_SEP_LEFT_PAREN,    // (
+  TOK_SEP_RIGHT_PAREN,   // )
+  TOK_SEP_LEFT_BRACKET,  // [
+  TOK_SEP_RIGHT_BRACKET, // ]
+  TOK_SEP_LEFT_BRACE,    // {
+  TOK_SEP_RIGHT_BRACE,   // }
+  TOK_SEP_COMMA,         // ,
+  TOK_SEP_SEMICOLON,     // ;
+  TOK_SEP_COLON,         // :
+  TOK_SEP_DOT,           // .
+  TOK_SEP_ELLIPSIS,      // ...
+  TOK_SEP_HASH,          // #
+
+  // Identifiers
+  TOK_ID,  // Any identifier not ending in _t
+  TOK_TID, // Any identifier ending in _t
+
+  // Constants
+  TOK_CONST_INTEGER_U32,    // u
+  TOK_CONST_INTEGER_U64,    // ul
+  TOK_CONST_INTEGER_S32,    // (no suffix)
+  TOK_CONST_INTEGER_S64,    // l
+  TOK_CONST_FLOAT_32,       // f
+  TOK_CONST_FLOAT_64,       // (no suffix)
+  TOK_CONST_CHAR,           // 'c'
+  TOK_CONST_STRING_ASCII,   // "string" (width of 8 bits)
+
+  // Special
+  TOK_SPECIAL_EOF,
+  TOK_SPECIAL_ERROR,
+} c_token_types;
+
+void tokenizer_init(const char *filename);
+token_t *tokenizer_get(void);
+void tokenizer_unget(token_t *token);
+void tokenizer_destroy(void);
+
+c_token_types token_type(token_t *token);
+int64_t token_int(token_t *token);
+double token_float(token_t *token);
+const char *token_string(token_t *token);
+char token_char(token_t *token);
+int token_line(token_t *token);
+int token_column(token_t *token);
+void token_destroy(token_t *token);
+
+#endif+
\ No newline at end of file
diff --git a/projects/cminus/lexer.html b/projects/cminus/lexer.html
@@ -20,6 +20,13 @@
       <img src="/images/line.gif" alt="decorative line">
     </div>
   </header>
+  <noscript>
+    <div style="background-color: #ffcccc; padding: 10px; text-align: center;">
+      <p><img src="/images/netscapenow.gif" alt="Netscape badge" width="88" height="31">
+        This site might look pretty old, but it still uses JS for syntax highlighting. If you'd like to see code
+        examples that don't look monochrome, please enable JS. or don't. I'm not your dad.</p>
+    </div>
+  </noscript>
   <main style="padding: 20px;">
     <h2>General Project Structure</h2>
     <p>Since this is the first article, I'll outline the project structure for the C- interpreter.</p>
@@ -80,6 +87,9 @@
       data structures which make the algorithms as simple as possible. </p>
     <p>For example, instead of writing 50-100 lines of code to hold every keyword in the language, I can just use a
       simple hash table. </p>
+    <h4>Misc</h4>
+    <p>The code on the blog is not the full code. I'm leaving out stuff like header guards and includes for brevity. The
+      full code is available on the <a href="git.reagancfischer.dev/cminus.git">git repo</a>.</p>
     <h2>The Lexer</h2>
     <p>A lexical analyzer reads source code and produces tokens, which are the smallest unit of meaning in a language.
       For example, in the C programming language, the tokens are things like keywords (if, else, while, etc.),
@@ -244,9 +254,10 @@ c_token_types token_type(token_t *token);
 int64_t token_int(token_t *token);
 double token_float(token_t *token);
 const char *token_string(token_t *token);
+char token_char(token_t *token);
 int token_line(token_t *token);
 int token_column(token_t *token);
-const char *token_file(token_t *token);
+void token_destroy(token_t *token);
 </code></pre>
     </div>
     <p>I'll also define an opaque struct for tokens and an enum for token types. </p>
@@ -403,7 +414,7 @@ int main() {
               struct</a></em></strong></span>
       <pre class="prettyprint"><code class="">
 #define TOK_MAGIC_1 0x544F4B454E544F4B // "TOKENTOK"
-#define TOK_MAGIC_2 0x544F4B544F4B454E // "TOKTOKENT"
+#define TOK_MAGIC_2 0x544F4B544F4B454E // "TOKTOKEN"
 struct token {
   long magic;
   int line;
@@ -411,6 +422,9 @@ struct token {
   short kind;
   char opt_data[0];
 };
+
+typedef struct token token_t;
+
 struct token_data {
   union {
     long long i;
@@ -419,16 +433,18 @@ struct token_data {
     char c;
   } data;
 };
+
+typedef struct token_data token_data_t;
 </code></pre>
     </div>
     <p>By keeping the token data off of simple tokens (like keywords and operators), we save 8 bytes per token. This
       will add up quickly, as the lexer will produce a lot of tokens. </p>
-    <p>Now that we have the token struct, we can implement some internal utilities for working with tokens </p>
+    <p>Now that we have the token struct, we can implement some internal utilities for working with tokens. </p>
     <div class="code-block">
       <span class="file-name">tokenizer.c</span>
       <span class="block-header">
-        <strong class="block-title"><em><a id=":token-api" href="#:token-api">Token
-              API</a></em></strong></span>
+        <strong class="block-title"><em><a id=":token-internals" href="#:token-internals">Token
+              Internals</a></em></strong></span>
       <pre class="prettyprint"><code class="">#define token_data(token) ((struct token_data *)((token)-&gt;opt_data))
 static token_t *token_data_create(c_token_types kind, int line, int column) {
   token_t *token = malloc(sizeof(token_t) + sizeof(struct token_data));
@@ -449,13 +465,13 @@ static token_t *token_create(c_token_types kind, int line, int column) {
 }
 
 static token_t *token_create_int(c_token_types kind, int line, int column, int64_t i) {
-  token_t *token = token_create(kind, line, column);
+  token_t *token = token_data_create(kind, line, column);
   token_data(token)-&gt;data.i = i;
   return token;
 }
 
 static token_t *token_create_float(c_token_types kind, int line, int column, double f) {
-  token_t *token = token_create(kind, line, column);
+  token_t *token = token_data_create(kind, line, column);
   token_data(token)-&gt;data.f = f;
   return token;
 }
@@ -465,18 +481,469 @@ static token_t *token_create_string(c_token_types kind, int line, int column, co
 }
 
 static token_t *token_create_char(c_token_types kind, int line, int column, char c) {
-  token_t *token = token_create(kind, line, column);
+  token_t *token = token_data_create(kind, line, column);
+  token_data(token)-&gt;data.c = c;
+  return token;
+}
+</code></pre>
+    </div>
+    <p>You might wonder why token_create_string was left out. Before you ask that, try counting up all the repeated
+      strings in the code so far. Look at printf, for example. Let's say it's called 20 seperate times throughout the
+      code. that's <math>20 * strlen("printf") = 140</math> bytes for the strings alone. That's a lot of duplicated
+      memory.</p>
+    <p>If you've got some CS under your belt, you're probably screaming "hash table" at your screen right now. And
+      you're
+      right. I'll be implementing a basic hash table local to the lexer to keep track of strings in memory.</p>
+    <h4>Hash tables</h4>
+    <p>If you're not familiar with hash tables, the basic idea is that you have a function that takes a key and returns
+      a number. You then use that number to index into an array. The function calculates a (hopefully) unique value for
+      each input, which means every value has its own unique array index. this means you can get an O(1) lookup for any
+      value in the table. However, we don't have enough memory and hash functions aren't good enough for this ideal
+      case, so we'll get collisions. I'll be using a linked hash table, which means that each array entry is a linked
+      list consisting of the data and the next item in the chain. </p>
+    <p>I'm sure I'll want to use a hash table again in the future, and not necessarily for strings or with strcmp as the
+      key. So I'll make the hash table generic and a separate module. </p>
+    <div class="code-block">
+      <span class="file-name">hash_table.h</span>
+      <span class="block-header">
+        <strong class="block-title"><em><a id=":hash-table-interface" href="#:hash-table-interface">Hash
+              table interface</a></em></strong></span>
+      <pre class="prettyprint"><code class="">typedef struct hash_table hash_table_t;
+typedef struct hash_table_entry hash_table_entry_t;
+typedef int (*hash_table_cmp_fn)(void *key1, void *key2);
+typedef unsigned long (*hash_table_hash_fn)(void *key);
+hash_table_t *hash_table_create(int size, hash_table_cmp_fn cmp, hash_table_hash_fn hash);
+void hash_table_destroy(hash_table_t *table);
+void *hash_table_get(hash_table_t *table, void *key);
+int hash_table_put(hash_table_t *table, void *key, void *value, int replace);
+void hash_table_remove(hash_table_t *table, void *key);
+</code></pre>
+    </div>
+    <p>Now that we have the hash table interface, we can implement the generic hash table. </p>
+    <div class="code-block">
+      <span class="file-name">hash_table.c</span>
+      <span class="block-header">
+        <strong class="block-title"><em><a id=":string-hash-table" href="#:string-hash-table">String
+              hash table</a></em></strong></span>
+      <pre class="prettyprint"><code class="">struct hash_table {
+  hash_table_entry_t **entries;
+  int size;
+  hash_table_cmp_fn cmp;
+  hash_table_hash_fn hash;
+};
+struct hash_table_entry {
+  void *key;
+  void *value;
+  hash_table_entry_t *next;
+};
+
+hash_table_t *hash_table_create(int size, hash_table_cmp_fn cmp, hash_table_hash_fn hash) {
+  hash_table_t *table = malloc(sizeof(hash_table_t));
+  if (table == NULL) {
+    return NULL;
+  }
+  table-&gt;entries = calloc(size, sizeof(hash_table_entry_t *));
+  if (table-&gt;entries == NULL) {
+    free(table);
+    return NULL;
+  }
+  table-&gt;size = size;
+  table-&gt;cmp = cmp;
+  table-&gt;hash = hash;
+  return table;
+}
+
+void hash_table_destroy(hash_table_t *table) {
+  for (int i = 0; i &lt; table-&gt;size; i++) {
+    hash_table_entry_t *entry = table-&gt;entries[i];
+    while (entry != NULL) {
+      hash_table_entry_t *next = entry-&gt;next;
+      free(entry);
+      entry = next;
+    }
+  }
+  free(table-&gt;entries);
+  free(table);
+}
+
+void *hash_table_get(hash_table_t *table, void *key) {
+  unsigned long hash = table-&gt;hash(key) % table-&gt;size;
+  hash_table_entry_t *entry = table-&gt;entries[hash];
+  while (entry != NULL) {
+    if (table-&gt;cmp(entry-&gt;key, key) == 0) {
+      return entry-&gt;value;
+    }
+    entry = entry-&gt;next;
+  }
+  return NULL;
+}
+
+int hash_table_put(hash_table_t *table, void *key, void *value, int replace) {
+  unsigned long hash = table-&gt;hash(key) % table-&gt;size;
+  hash_table_entry_t *entry = table-&gt;entries[hash];
+  while (entry != NULL) {
+    if (table-&gt;cmp(entry-&gt;key, key) == 0) {
+      if (replace) {
+        entry-&gt;value = value;
+        return 0;
+      } else {
+      return 1;
+      }
+    }
+    entry = entry-&gt;next;
+  }
+  entry = malloc(sizeof(hash_table_entry_t));
+  entry-&gt;key = key;
+  entry-&gt;value = value;
+  entry-&gt;next = table-&gt;entries[hash];
+  table-&gt;entries[hash] = entry;
+  return 0;
+}
+
+void hash_table_remove(hash_table_t *table, void *key) {
+  unsigned long hash = table-&gt;hash(key) % table-&gt;size;
+  hash_table_entry_t *entry = table-&gt;entries[hash];
+  hash_table_entry_t *prev = NULL;
+  while (entry != NULL) {
+    if (table-&gt;cmp(entry-&gt;key, key) == 0) {
+      if (prev == NULL) {
+        table-&gt;entries[hash] = entry-&gt;next;
+      } else {
+        prev-&gt;next = entry-&gt;next;
+      }
+      free(entry);
+      return;
+    }
+    prev = entry;
+    entry = entry-&gt;next;
+  }
+}
+</code></pre>
+    </div>
+    <p>One implementation detail of note is that hash_table_put returns 1 if the key already exists in the table, and,
+      unless replace is set to 1, will not replace the value. </p>
+    <p>This is useful as our token_create_string function can just check if the string already exists in the table and
+      free the memory if it does. </p>
+    <h4>String hashing</h4>
+    <p>Now we can implement the string hash table for the tokenizer.</p>
+    <p>A lot of research has been done on hash functions (See <a href="https://www.strchr.com/hash_functions">here for a
+        good review of theory and comparison of various functions</a>). For this project, I'll be using the hash
+      function from the famous "Red Dragon Book" (Aho, Sethi, Ullman, Compilers: Principles, Techniques, and Tools, 1st
+      edition). as it's optimized for compiler symbols. </p>
+    <div class="code-block">
+      <span class="file-name">tokenizer.c</span>
+      <span class="block-header">
+        <strong class="block-title"><em><a id=":string-hash-function" href="#:string-hash-function">String
+              hash function</a></em></strong></span>
+      <pre class="prettyprint"><code class="">#define PRIME 211
+static unsigned long hash_string(void *key) {
+  unsigned long hash = 0, g;
+  char *p = key;
+  while (*p) {
+    hash = (hash &lt;&lt; 4) + *p++;
+    if ((g = hash &amp; 0xf0000000) != 0) {
+      hash ^= g &gt;&gt; 24;
+      hash ^= g;
+    }
+  }
+  return hash;
+}
+</code></pre>
+    </div>
+    <p>We'll also need a comparison function for strings. </p>
+    <div class="code-block">
+      <span class="file-name">tokenizer.c</span>
+      <span class="block-header">
+        <strong class="block-title"><em><a id=":string-compare-function" href="#:string-compare-function">String
+              compare function</a></em></strong></span>
+      <pre class="prettyprint"><code class="">static int cmp_string(void *key1, void *key2) {
+  return strcmp((char *)key1, (char *)key2);
+}
+</code></pre>
+    </div>
+    <p>Now that we have the hash table, we can implement the string creation function. </p>
+    <div class="code-block">
+      <span class="file-name">tokenizer.c</span>
+      <span class="block-header">
+        <strong class="block-title"><em><a id=":string-creation" href="#:string-creation">String
+              creation</a></em></strong></span>
+      <pre class="prettyprint"><code class="">static hash_table_t *string_table;
+static token_t *token_create_string(c_token_types kind, int line, int column, const char *s) {
+  if (string_table == NULL) {
+    string_table = hash_table_create(1024, cmp_string, hash_string);
+  }
+  token_t *token = token_data_create(kind, line, column);
+  char *key = strdup(s);
+  if (hash_table_put(string_table, key, key, 0)) {
+    free(key);
+    key = hash_table_get(string_table, key);
+  }
+  token_data(token)-&gt;data.s = key;
+  return token;
+}
+</code></pre>
+    </div>
+
+    <h4>External Token API</h4>
+    <p>Sticking with the theme of keeping data structures out of headers, the parser will have to access tokens using
+      functions defined in the tokenizer. We'll implement those now. </p>
+    <div class="code-block">
+      <span class="file-name">tokenizer.c</span>
+      <span class="block-header">
+        <strong class="block-title"><em><a id=":external-token-api" href="#:external-token-api">External
+              token API</a></em></strong></span>
+      <pre class="prettyprint"><code class="">
+c_token_types token_type(token_t *token) {
+  assert(token-&gt;magic == TOK_MAGIC_1 || token-&gt;magic == TOK_MAGIC_2);
+  return token-&gt;kind;
+}
+
+int64_t token_int(token_t *token) {
+  assert(token-&gt;kind == TOK_CONST_INTEGER_U32 ||
+         token-&gt;kind == TOK_CONST_INTEGER_U64 ||
+         token-&gt;kind == TOK_CONST_INTEGER_S32 ||
+         token-&gt;kind == TOK_CONST_INTEGER_S64);
+  assert(token-&gt;magic == TOK_MAGIC_1);
+  return token_data(token)-&gt;data.i;
+}
+
+double token_float(token_t *token) {
+  assert(token-&gt;kind == TOK_CONST_FLOAT_32 ||
+         token-&gt;kind == TOK_CONST_FLOAT_64);
+  assert(token-&gt;magic == TOK_MAGIC_1);
+  return token_data(token)-&gt;data.f;
+}
+
+const char *token_string(token_t *token) {
+  assert(token-&gt;kind == TOK_CONST_STRING_ASCII);
+  assert(token-&gt;magic == TOK_MAGIC_1);
+  return token_data(token)-&gt;data.s;
+}
+
+char token_char(token_t *token) {
+  assert(token-&gt;kind == TOK_CONST_CHAR);
+  assert(token-&gt;magic == TOK_MAGIC_1);
+  return token_data(token)-&gt;data.c;
+}
+
+int token_line(token_t *token) {
+  assert(token-&gt;magic == TOK_MAGIC_1 || token-&gt;magic == TOK_MAGIC_2);
+  return token-&gt;line;
+}
+
+int token_column(token_t *token) {
+  assert(token-&gt;magic == TOK_MAGIC_1 || token-&gt;magic == TOK_MAGIC_2);
+  return token-&gt;column;
+}
+
+void token_destroy(token_t *token) {
+  assert(token-&gt;magic == TOK_MAGIC_1 || token-&gt;magic == TOK_MAGIC_2);
+  if (token-&gt;magic == TOK_MAGIC_1) {
+    if (token-&gt;kind == TOK_CONST_STRING_ASCII) {
+      free((void *)token_data(token)-&gt;data.s);
+    }
+  }
+  free(token);
+}</code></pre>
+    </div>
+    <p>You might notice that this code uses a lot of asserts. I plan to do this for all functions which access internal
+      data structures to help reduce bugs. </p>
+    <h4>Tokenizer Implementation</h4>
+    <p>I'll implement the less interesting functions first. </p>
+    <div class="code-block">
+      <span class="file-name">tokenizer.c</span>
+      <span class="block-header">
+        <strong class="block-title"><em><a id=":tokenizer-implementation" href="#:tokenizer-implementation">Tokenizer
+              implementation</a></em></strong></span>
+      <pre class="prettyprint"><code class="">token_t *unget_token = NULL;
+void tokenizer_init(const char *filename)
+{
+    input_init(filename);
+}
+
+void tokenizer_unget(token_t *token)
+{
+  assert(unget_token == NULL);
+  unget_token = token;
+}
+
+void tokenizer_destroy(void)
+{
+  input_destroy();
+  if (string_table != NULL)
+  {
+    hash_table_destroy(string_table);
+  }
+}</code></pre>
+    </div>
+    <p>Now we can implement the meat of the tokenizer. The general structure is as follows: </p>
+    <ol>
+      <li>Read a character from the input stream. </li>
+      <li>If whitespace, skip it. </li>
+      <li>If a digit or decimal point, read a number. </li>
+      <li>If a letter, read an identifier or keyword. </li>
+      <li>If a quote, read a string. </li>
+      <li>If a single quote, read a char.</li>
+      <li>If not any of the above, read an operator or separator. </li>
+      <li>Return the token if it matches any of the above, otherwise return an error or EOF token. </li>
+    </ol>
+    <p>Each of the functions for reading a token will return a token or <code>NULL</code> if it's not the right type.
+    </p>
+    <p>Let's first write a function to skip whitespace and comments. The rule in C is that whitespace is ignored except
+      when it separates tokens, any character sequence starting with <code>//</code> is a comment until the end of the
+      line, and any character sequence starting with <code>/*</code> is a comment until the next <code>*/</code>. </p>
+    <div class="code-block">
+      <span class="file-name">tokenizer.c</span>
+      <span class="block-header">
+        <strong class="block-title"><em><a id=":skip-whitespace" href="#:skip-whitespace">Skip
+              whitespace</a></em></strong></span>
+      <pre class="prettyprint"><code class="">static void skip_whitespace(void) {
+  int c;
+  while ((c = input_getc()) != EOF) {
+    if (isspace(c)) {
+      if (c == '\n') {
+        line++;
+        column = 1;
+      } else {
+        column++;
+      }
+    } else if (c == '/') {
+      c = input_getc();
+      if (c == '/') {
+        while ((c = input_getc()) != EOF && c != '\n') {
+          column++;
+        }
+        if (c == EOF) {
+          return;
+        }
+        line++;
+        column = 1;
+      } else if (c == '*') {
+        while ((c = input_getc()) != EOF) {
+          if (c == '*') {
+            c = input_getc();
+            if (c == '/') {
+              break;
+            }
+          } else if (c == '\n') {
+            line++;
+            column = 1;
+          } else {
+            column++;
+          }
+        }
+        if (c == EOF) {
+          return;
+        }
+      } else {
+        input_ungetc(c);
+        return;
+      }
+    } else {
+      input_ungetc(c);
+      return;
+    }
+  }
+}</code></pre>
+    </div>
+    <p>Now we'll implement code to recognize keywords and identifiers. The rules for identifiers are that they must
+      start with a letter or underscore and can contain letters, digits, and underscores. </p>
+    <div class="code-block">
+      <span class="file-name">tokenizer.c</span>
+      <span class="block-header">
+        <strong class="block-title"><em><a id=":read-identifier" href="#:read-identifier">Read
+              identifier</a></em></strong></span>
+      <pre class="prettyprint"><code class="">static token_t *read_identifier(void) {
+  int c;
+  char buf[1024];
+  int i = 0;
+  c = input_getc();
+  if (!isalpha(c) &amp;&amp; c != '_') {
+    input_ungetc(c);
+    return NULL;
+  }
+  buf[i++] = c;
+  while ((c = input_getc()) != EOF) {
+    if (!isalnum(c) &amp;&amp; c != '_') {
+      input_ungetc(c);
+      break;
+    }
+    buf[i++] = c;
+  }
+  buf[i] = '\0';
+
+  // Check if it's a keyword
+  c_token_types kind = get_keyword(buf);
+
+  if (kind == TOK_ID) {
+    // Check if it's a typedeffed identifier
+    int len = strlen(buf);
+    if (len &gt; 2 &amp;&amp; buf[len - 2] == '_' &amp;&amp; buf[len - 1] == 't') {
+      kind = TOK_TID;
+    }
+  }
+  return token_create_string(kind, line, column, buf);
+}</code></pre>
+    </div>
+    <p>For recognizing keywords, I'll use a decision tree. This is a simple and space-efficient way to recognize
+      keywords. Originally I wanted to use a trie, but those end up using a lot of memory for a small number of
+      items. </p>
+      <p> To generate the decision tree, I'll use a simple C program which works off the following template: </p>
+      <div class="code-block">
+        <span class="file-name">N/A</span>
+        <span class="block-header">
+          <strong class="block-title"><em><a id=":keyword-decision-tree" href="#:keyword-decision-tree">Keyword
+                decision tree</a></em></strong></span>
+        <pre class="prettyprint"><code class="">c_token_types get_keyword(const char *buf) {
+  switch (buf[0]) {
+    case [first letter]:
+      switch (buf[1]) {
+        case [second letter]:
+          switch (buf[2]) {
+            case [third letter]:
+              return [keyword];
+            default:
+              return TOK_ID;
+          }
+        default:
+          return TOK_ID;
+      }
+    default:
+      return TOK_ID;
+  }
+}</code></pre>
+      </div>
+      <p>For example, if we just fed the program the keyword "if", it would output the following: </p>
+      <div class="code-block">
+        <span class="file-name">N/A</span>
+        <span class="block-header">
+          <strong class="block-title"><em><a id=":if-keyword" href="#:if-keyword">If
+                keyword</a></em></strong></span>
+        <pre class="prettyprint"><code class="">c_token_types get_keyword(const char *buf) {
+  switch (buf[0]) {
+    case 'i':
+      switch (buf[1]) {
+        case 'f':
+          switch (buf[2]) {
+            case '\0':
+              return TOK_CTK_IF;
+            default:
+              return TOK_ID;
+          }
+        default:
+          return TOK_ID;
+      }
+    default:
+      return TOK_ID;
+  }
 }
 </code></pre>
-</div>
-<p>Now for the actual API</p>
-<div class="code-block">
-  <span class="file-name">tokenizer.c</span>
-  <span class="block-header">
-    <strong class="block-title"><em><a id=":token-api" href="#:token-api">Token
-          API</a></em></strong></span>
-  <pre class="prettyprint"><code class="">void tokenizer_init(const char *filename) {
-    
+      </div>
+      <p>The program will be placed into the main tokenizer.c file using the HIGH QUALITY TECHNIQUE of including a c file
+        in another c file. </p>
+        <p> Time to write the program. </p>
+        
+
   </main>
 
   <footer style="text-align: center; padding: 20px;">
diff --git a/test b/test
@@ -1 +0,0 @@
-a

	website Website contents
	git clone git://git.reagancfischer.dev/website.git
	Log \| Files \| Refs

M	about.html	\|	2	+-
M	blog.html	\|	2	+-
M	blog/libc_22oct23.html	\|	2	+-
M	cat/index.html	\|	113	+++++++++++++++++++++++++++++++++++++++++--------------------------------------
M	contact.html	\|	2	+-
A	images/netscapenow.gif	\|	0
M	index.html	\|	2	+-
M	projects.html	\|	2	+-
A	projects/cminus/code/hash_table.c	\|	124	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	projects/cminus/code/hash_table.h	\|	14	++++++++++++++
M	projects/cminus/code/input.c	\|	23	++++++++++++++++++-----
A	projects/cminus/code/tokenizer.c	\|	159	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	projects/cminus/code/tokenizer.h	\|	134	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	projects/cminus/lexer.html	\|	501	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
D	test	\|	1	-