website

Website contents
git clone git://git.reagancfischer.dev/website.git
Log | Files | Refs

token.c (10600B)


      1 /* token.c */
      2 #include <stdlib.h>
      3 #include <string.h>
      4 #include <stdio.h>
      5 #include <assert.h>
      6 #include <ctype.h>
      7 #include "token.h"
      8 #include "hash_table.h"
      9 #include "util.h"
     10 /* Token Data Structure */
     11 #define TOK_MAGIC_1 0x544F4B454E544F4Bul // "TOKENTOK"
     12 #define TOK_MAGIC_2 0x544F4B544F4B454Eul // "TOKTOKEN"
     13 
     14 struct token {
     15   long magic;
     16   int line;
     17   int column;
     18   short kind;
     19   long opt_data[0];
     20 };
     21 
     22 typedef struct token token_t;
     23 
     24 struct token_data {
     25   union {
     26     int64_t i;
     27     double f;
     28     const char *s;
     29     char c;
     30   } data;
     31 };
     32 
     33 typedef struct token_data token_data_t;
     34 int column = 1;
     35 int line = 1;
     36 
     37 /* Token Data Access */
     38 #define token_data(token) ((struct token_data *)((token)->opt_data))
     39 
     40 c_token_types token_type(token_t *token) {
     41   assert(token->magic == TOK_MAGIC_1 || token->magic == TOK_MAGIC_2);
     42   return token->kind;
     43 }
     44 
     45 int64_t token_int(token_t *token) {
     46   assert(token->kind == TOK_INTEGER_U32 || token->kind == TOK_INTEGER_U64 || token->kind == TOK_INTEGER_S32 || token->kind == TOK_INTEGER_S64);
     47   assert(token->magic == TOK_MAGIC_1);
     48   return token_data(token)->data.i;
     49 }
     50 
     51 double token_float(token_t *token) {
     52   assert(token->kind == TOK_FLOAT_32 || token->kind == TOK_FLOAT_64);
     53   assert(token->magic == TOK_MAGIC_1);
     54   return token_data(token)->data.f;
     55 }
     56 
     57 const char *token_string(token_t *token) {
     58   assert(token->kind == TOK_STRING_ASCII || token->kind == TOK_ID || token->kind == TOK_TYPEDEF_NAME);
     59   assert(token->magic == TOK_MAGIC_1);
     60   return token_data(token)->data.s;
     61 }
     62 
     63 char token_char(token_t *token) {
     64   assert(token->kind == TOK_CHAR_CONST);
     65   assert(token->magic == TOK_MAGIC_1);
     66   return token_data(token)->data.c;
     67 }
     68 
     69 int token_line(token_t *token) {
     70   assert(token->magic == TOK_MAGIC_1 || token->magic == TOK_MAGIC_2);
     71   return token->line;
     72 }
     73 
     74 int token_column(token_t *token) {
     75   assert(token->magic == TOK_MAGIC_1 || token->magic == TOK_MAGIC_2);
     76   return token->column;
     77 }
     78 
     79 /* Token Creation and Destruction */
     80 token_t *token_data_create(c_token_types kind, int lin, int col, int len) {
     81   token_t *token = malloc(sizeof(token_t) + sizeof(struct token_data));
     82   if (!token) {
     83     fputs("Out of memory\n", stderr);
     84     exit(1);
     85   }
     86   token->magic = TOK_MAGIC_1;
     87   token->line = lin;
     88   token->column = col;
     89   column += len;
     90   token->kind = kind;
     91   return token;
     92 }
     93 
     94 token_t *token_create(c_token_types kind, int lin, int col, int len) {
     95   token_t *token = malloc(sizeof(token_t));
     96   if (!token) {
     97     fputs("Out of memory\n", stderr);
     98     exit(1);
     99   }
    100   token->magic = TOK_MAGIC_2;
    101   token->line = lin;
    102   token->column = col;
    103   column += len;
    104   token->kind = kind;
    105   return token;
    106 }
    107 
    108 token_t *token_create_int(c_token_types kind, int lin, int col, int64_t i, int len) {
    109   token_t *token = token_data_create(kind, lin, col, len);
    110   token_data(token)->data.i = i;
    111   return token;
    112 }
    113 
    114 token_t *token_create_float(c_token_types kind, int lin, int col, double f, int len) {
    115   token_t *token = token_data_create(kind, lin, col, len);
    116   token_data(token)->data.f = f;
    117   return token;
    118 }
    119 
    120 token_t *token_create_char(c_token_types kind, int lin, int col, char c, int len) {
    121   token_t *token = token_data_create(kind, lin, col, len);
    122   token_data(token)->data.c = c;
    123   return token;
    124 }
    125 
    126 void token_destroy(token_t *token) {
    127   if (token->magic == TOK_MAGIC_1 || token->magic == TOK_MAGIC_2) {
    128     free(token);
    129   } else {
    130     fputs("Corrupt token\n", stderr);
    131     exit(1);
    132   }
    133 }
    134 
    135 /* Token Create String */
    136 hash_table_t *string_table;
    137 token_t *token_create_string(c_token_types kind, int lin, int col,
    138                                     const char *s, int len) {
    139   if (string_table == NULL) {
    140     string_table = hash_table_create(2048, cmp_string, hash_string, dtor_string);
    141   }
    142   token_t *token = token_data_create(kind, lin, col, len);
    143   char *key = hash_table_get(string_table, (void *)s);
    144   if (key == NULL) {
    145     key = strdup(s);
    146     hash_table_put(string_table, key, key);
    147   }
    148   token_data(token)->data.s = key;
    149   return token;
    150 }
    151 
    152 /* Token Debugging */
    153 /* Token Type Enum to String */
    154 const char *token_name_from_type(c_token_types type) {
    155   switch (type) {
    156   case TOK_IF:
    157     return "TOK_IF";
    158   case TOK_ELSE:
    159     return "TOK_ELSE";
    160   case TOK_SWITCH:
    161     return "TOK_SWITCH";
    162   case TOK_CASE:
    163     return "TOK_CASE";
    164   case TOK_DEFAULT:
    165     return "TOK_DEFAULT";
    166   case TOK_WHILE:
    167     return "TOK_WHILE";
    168   case TOK_DO:
    169     return "TOK_DO";
    170   case TOK_FOR:
    171     return "TOK_FOR";
    172   case TOK_CONTINUE:
    173     return "TOK_CONTINUE";
    174   case TOK_BREAK:
    175     return "TOK_BREAK";
    176   case TOK_RETURN:
    177     return "TOK_RETURN";
    178   case TOK_GOTO:
    179     return "TOK_GOTO";
    180   case TOK_VOID:
    181     return "TOK_VOID";
    182   case TOK_CHAR:
    183     return "TOK_CHAR";
    184   case TOK_SHORT:
    185     return "TOK_SHORT";
    186   case TOK_INT:
    187     return "TOK_INT";
    188   case TOK_LONG:
    189     return "TOK_LONG";
    190   case TOK_FLOAT:
    191     return "TOK_FLOAT";
    192   case TOK_DOUBLE:
    193     return "TOK_DOUBLE";
    194   case TOK_SIGNED:
    195     return "TOK_SIGNED";
    196   case TOK_UNSIGNED:
    197     return "TOK_UNSIGNED";
    198   case TOK_STRUCT:
    199     return "TOK_STRUCT";
    200   case TOK_UNION:
    201     return "TOK_UNION";
    202   case TOK_ENUM:
    203     return "TOK_ENUM";
    204   case TOK_TYPEDEF:
    205     return "TOK_TYPEDEF";
    206   case TOK_AUTO:
    207     return "TOK_AUTO";
    208   case TOK_REGISTER:
    209     return "TOK_REGISTER";
    210   case TOK_STATIC:
    211     return "TOK_STATIC";
    212   case TOK_EXTERN:
    213     return "TOK_EXTERN";
    214   case TOK_CONST:
    215     return "TOK_CONST";
    216   case TOK_VOLATILE:
    217     return "TOK_VOLATILE";
    218   case TOK_SIZEOF:
    219     return "TOK_SIZEOF";
    220   case TOK_ADD:
    221     return "TOK_ADD";
    222   case TOK_SUB:
    223     return "TOK_SUB";
    224   case TOK_MUL:
    225     return "TOK_MUL";
    226   case TOK_DIV:
    227     return "TOK_DIV";
    228   case TOK_MOD:
    229     return "TOK_MOD";
    230   case TOK_BIT_AND:
    231     return "TOK_BIT_AND";
    232   case TOK_BIT_OR:
    233     return "TOK_BIT_OR";
    234   case TOK_BIT_XOR:
    235     return "TOK_BIT_XOR";
    236   case TOK_BIT_NOT:
    237     return "TOK_BIT_NOT";
    238   case TOK_LSHIFT:
    239     return "TOK_LSHIFT";
    240   case TOK_RSHIFT:
    241     return "TOK_RSHIFT";
    242   case TOK_NOT:
    243     return "TOK_NOT";
    244   case TOK_ASSIGN:
    245     return "TOK_ASSIGN";
    246   case TOK_LT:
    247     return "TOK_LT";
    248   case TOK_GT:
    249     return "TOK_GT";
    250   case TOK_INC:
    251     return "TOK_INC";
    252   case TOK_DEC:
    253     return "TOK_DEC";
    254   case TOK_EQ:
    255     return "TOK_EQ";
    256   case TOK_NE:
    257     return "TOK_NE";
    258   case TOK_LE:
    259     return "TOK_LE";
    260   case TOK_GE:
    261     return "TOK_GE";
    262   case TOK_AND:
    263     return "TOK_AND";
    264   case TOK_OR:
    265     return "TOK_OR";
    266   case TOK_MEMBER_POINTER:
    267     return "TOK_MEMBER_POINTER";
    268   case TOK_MEMBER:
    269     return "TOK_MEMBER";
    270   case TOK_COND_DECISION:
    271     return "TOK_COND_DECISION";
    272   case TOK_COND:
    273     return "TOK_COND";
    274   case TOK_ASSIGN_ADD:
    275     return "TOK_ASSIGN_ADD";
    276   case TOK_ASSIGN_SUB:
    277     return "TOK_ASSIGN_SUB";
    278   case TOK_ASSIGN_MUL:
    279     return "TOK_ASSIGN_MUL";
    280   case TOK_ASSIGN_DIV:
    281     return "TOK_ASSIGN_DIV";
    282   case TOK_ASSIGN_MOD:
    283     return "TOK_ASSIGN_MOD";
    284   case TOK_ASSIGN_BITAND:
    285     return "TOK_ASSIGN_BITAND";
    286   case TOK_ASSIGN_BITOR:
    287     return "TOK_ASSIGN_BITOR";
    288   case TOK_ASSIGN_BITXOR:
    289     return "TOK_ASSIGN_BITXOR";
    290   case TOK_ASSIGN_LSHIFT:
    291     return "TOK_ASSIGN_LSHIFT";
    292   case TOK_ASSIGN_RSHIFT:
    293     return "TOK_ASSIGN_RSHIFT";
    294   case TOK_HASH:
    295     return "TOK_HASH";
    296   case TOK_ID:
    297     return "TOK_ID";
    298   case TOK_TYPEDEF_NAME:
    299     return "TOK_TYPEDEF_NAME";
    300   case TOK_INTEGER_U32:
    301     return "TOK_INTEGER_U32";
    302   case TOK_INTEGER_U64:
    303     return "TOK_INTEGER_U64";
    304   case TOK_INTEGER_S32:
    305     return "TOK_INTEGER_S32";
    306   case TOK_INTEGER_S64:
    307     return "TOK_INTEGER_S64";
    308   case TOK_FLOAT_32:
    309     return "TOK_FLOAT_32";
    310   case TOK_FLOAT_64:
    311     return "TOK_FLOAT_64";
    312   case TOK_CHAR_CONST:
    313     return "TOK_CHAR_CONST";
    314   case TOK_STRING_ASCII:
    315     return "TOK_STRING_ASCII";
    316   case TOK_EOF:
    317     return "TOK_EOF";
    318   case TOK_ERROR:
    319     return "TOK_ERROR";
    320   case TOK_LEFT_PAREN:
    321     return "TOK_LEFT_PAREN";
    322   case TOK_RIGHT_PAREN:
    323     return "TOK_RIGHT_PAREN";
    324   case TOK_LEFT_BRACKET:
    325     return "TOK_LEFT_BRACKET";
    326   case TOK_RIGHT_BRACKET:
    327     return "TOK_RIGHT_BRACKET";
    328   case TOK_LEFT_BRACE:
    329     return "TOK_LEFT_BRACE";
    330   case TOK_RIGHT_BRACE:
    331     return "TOK_RIGHT_BRACE";
    332   case TOK_COMMA:
    333     return "TOK_COMMA";
    334   case TOK_SEMICOLON:
    335     return "TOK_SEMICOLON";
    336   case TOK_DOT:
    337     return "TOK_DOT";
    338   case TOK_ELLIPSIS:
    339     return "TOK_ELLIPSIS";
    340   }
    341   return "UNKNOWN";
    342 }
    343 
    344 /* Unescape String */
    345 #define clamp(x, min, max) ((x) < (min) ? (min) : (x) > (max) ? (max) : (x))
    346 char *re_escape_string(const char *str) {
    347   int len = strlen(str);
    348   char *buf = malloc(len * 2 + 1);
    349   if (!buf) {
    350     fprintf(stderr, "Out of memory. Cannot escape string\n");
    351     exit(1);
    352   }
    353   int i = 0;
    354   for (int j = 0; j < len; j++) {
    355     switch (str[j]) {
    356     case '\a': buf[i++] = '\\'; buf[i++] = 'a'; break;
    357     case '\b': buf[i++] = '\\'; buf[i++] = 'b'; break;
    358     case '\f': buf[i++] = '\\'; buf[i++] = 'f'; break;
    359     case '\n': buf[i++] = '\\'; buf[i++] = 'n'; break;
    360     case '\r': buf[i++] = '\\'; buf[i++] = 'r'; break;
    361     case '\t': buf[i++] = '\\'; buf[i++] = 't'; break;
    362     case '\v': buf[i++] = '\\'; buf[i++] = 'v'; break;
    363     case '\\': buf[i++] = '\\'; buf[i++] = '\\'; break;
    364     case '\'': buf[i++] = '\\'; buf[i++] = '\''; break;
    365     case '"': buf[i++] = '\\'; buf[i++] = '"'; break;
    366     default: {
    367       if (isprint(str[j])) {
    368         buf[i++] = str[j];
    369       } else {
    370         buf[i++] = '\\';
    371         buf[i++] = 'x';
    372         buf[i++] = "0123456789abcdef"[clamp(str[j] >> 4, 0, 0xf)];
    373         buf[i++] = "0123456789abcdef"[clamp(str[j] & 0xf, 0, 0xf)];
    374       }
    375     }
    376     }
    377   }
    378   buf[i] = '\0';
    379   return buf;
    380 }
    381 
    382 /* Print Token */
    383 void print_token(token_t *tok) {
    384   if (!tok) {
    385     printf("NULL\n");
    386     return;
    387   }
    388   const char *name = token_name_from_type(tok->kind);
    389   switch (tok->kind) {
    390   case TOK_ID:
    391   case TOK_STRING_ASCII: {
    392     char *escaped = re_escape_string(token_string(tok));
    393     printf("%s: \"%s\"@%d:%d\n", name, escaped, tok->line, tok->column);
    394     free(escaped);
    395     break;
    396   }
    397   case TOK_TYPEDEF_NAME: {
    398     char *escaped = re_escape_string(token_string(tok));
    399     printf("%s: %s@%d:%d\n", name, escaped, tok->line, tok->column);
    400     free(escaped);
    401     break;
    402   }
    403   case TOK_CHAR_CONST:
    404     printf("%s: '%c'@%d:%d\n", name, token_char(tok), tok->line, tok->column);
    405     break;
    406   case TOK_INTEGER_S32:
    407   case TOK_INTEGER_U32:
    408   case TOK_INTEGER_S64:
    409   case TOK_INTEGER_U64:
    410     printf("%s: %ld@%d:%d\n", name, token_int(tok), tok->line, tok->column);
    411     break;
    412   case TOK_FLOAT_32:
    413   case TOK_FLOAT_64:
    414     printf("%s: %f@%d:%d\n", name, token_float(tok), tok->line, tok->column);
    415     break;
    416   default:
    417     printf("%s@%d:%d\n", name, tok->line, tok->column);
    418     break;
    419   }
    420 }
    421 
    422 
    423