token.c (10600B)
1 /* token.c */ 2 #include <stdlib.h> 3 #include <string.h> 4 #include <stdio.h> 5 #include <assert.h> 6 #include <ctype.h> 7 #include "token.h" 8 #include "hash_table.h" 9 #include "util.h" 10 /* Token Data Structure */ 11 #define TOK_MAGIC_1 0x544F4B454E544F4Bul // "TOKENTOK" 12 #define TOK_MAGIC_2 0x544F4B544F4B454Eul // "TOKTOKEN" 13 14 struct token { 15 long magic; 16 int line; 17 int column; 18 short kind; 19 long opt_data[0]; 20 }; 21 22 typedef struct token token_t; 23 24 struct token_data { 25 union { 26 int64_t i; 27 double f; 28 const char *s; 29 char c; 30 } data; 31 }; 32 33 typedef struct token_data token_data_t; 34 int column = 1; 35 int line = 1; 36 37 /* Token Data Access */ 38 #define token_data(token) ((struct token_data *)((token)->opt_data)) 39 40 c_token_types token_type(token_t *token) { 41 assert(token->magic == TOK_MAGIC_1 || token->magic == TOK_MAGIC_2); 42 return token->kind; 43 } 44 45 int64_t token_int(token_t *token) { 46 assert(token->kind == TOK_INTEGER_U32 || token->kind == TOK_INTEGER_U64 || token->kind == TOK_INTEGER_S32 || token->kind == TOK_INTEGER_S64); 47 assert(token->magic == TOK_MAGIC_1); 48 return token_data(token)->data.i; 49 } 50 51 double token_float(token_t *token) { 52 assert(token->kind == TOK_FLOAT_32 || token->kind == TOK_FLOAT_64); 53 assert(token->magic == TOK_MAGIC_1); 54 return token_data(token)->data.f; 55 } 56 57 const char *token_string(token_t *token) { 58 assert(token->kind == TOK_STRING_ASCII || token->kind == TOK_ID || token->kind == TOK_TYPEDEF_NAME); 59 assert(token->magic == TOK_MAGIC_1); 60 return token_data(token)->data.s; 61 } 62 63 char token_char(token_t *token) { 64 assert(token->kind == TOK_CHAR_CONST); 65 assert(token->magic == TOK_MAGIC_1); 66 return token_data(token)->data.c; 67 } 68 69 int token_line(token_t *token) { 70 assert(token->magic == TOK_MAGIC_1 || token->magic == TOK_MAGIC_2); 71 return token->line; 72 } 73 74 int token_column(token_t *token) { 75 assert(token->magic == TOK_MAGIC_1 || token->magic == TOK_MAGIC_2); 76 return token->column; 77 } 78 79 /* Token Creation and Destruction */ 80 token_t *token_data_create(c_token_types kind, int lin, int col, int len) { 81 token_t *token = malloc(sizeof(token_t) + sizeof(struct token_data)); 82 if (!token) { 83 fputs("Out of memory\n", stderr); 84 exit(1); 85 } 86 token->magic = TOK_MAGIC_1; 87 token->line = lin; 88 token->column = col; 89 column += len; 90 token->kind = kind; 91 return token; 92 } 93 94 token_t *token_create(c_token_types kind, int lin, int col, int len) { 95 token_t *token = malloc(sizeof(token_t)); 96 if (!token) { 97 fputs("Out of memory\n", stderr); 98 exit(1); 99 } 100 token->magic = TOK_MAGIC_2; 101 token->line = lin; 102 token->column = col; 103 column += len; 104 token->kind = kind; 105 return token; 106 } 107 108 token_t *token_create_int(c_token_types kind, int lin, int col, int64_t i, int len) { 109 token_t *token = token_data_create(kind, lin, col, len); 110 token_data(token)->data.i = i; 111 return token; 112 } 113 114 token_t *token_create_float(c_token_types kind, int lin, int col, double f, int len) { 115 token_t *token = token_data_create(kind, lin, col, len); 116 token_data(token)->data.f = f; 117 return token; 118 } 119 120 token_t *token_create_char(c_token_types kind, int lin, int col, char c, int len) { 121 token_t *token = token_data_create(kind, lin, col, len); 122 token_data(token)->data.c = c; 123 return token; 124 } 125 126 void token_destroy(token_t *token) { 127 if (token->magic == TOK_MAGIC_1 || token->magic == TOK_MAGIC_2) { 128 free(token); 129 } else { 130 fputs("Corrupt token\n", stderr); 131 exit(1); 132 } 133 } 134 135 /* Token Create String */ 136 hash_table_t *string_table; 137 token_t *token_create_string(c_token_types kind, int lin, int col, 138 const char *s, int len) { 139 if (string_table == NULL) { 140 string_table = hash_table_create(2048, cmp_string, hash_string, dtor_string); 141 } 142 token_t *token = token_data_create(kind, lin, col, len); 143 char *key = hash_table_get(string_table, (void *)s); 144 if (key == NULL) { 145 key = strdup(s); 146 hash_table_put(string_table, key, key); 147 } 148 token_data(token)->data.s = key; 149 return token; 150 } 151 152 /* Token Debugging */ 153 /* Token Type Enum to String */ 154 const char *token_name_from_type(c_token_types type) { 155 switch (type) { 156 case TOK_IF: 157 return "TOK_IF"; 158 case TOK_ELSE: 159 return "TOK_ELSE"; 160 case TOK_SWITCH: 161 return "TOK_SWITCH"; 162 case TOK_CASE: 163 return "TOK_CASE"; 164 case TOK_DEFAULT: 165 return "TOK_DEFAULT"; 166 case TOK_WHILE: 167 return "TOK_WHILE"; 168 case TOK_DO: 169 return "TOK_DO"; 170 case TOK_FOR: 171 return "TOK_FOR"; 172 case TOK_CONTINUE: 173 return "TOK_CONTINUE"; 174 case TOK_BREAK: 175 return "TOK_BREAK"; 176 case TOK_RETURN: 177 return "TOK_RETURN"; 178 case TOK_GOTO: 179 return "TOK_GOTO"; 180 case TOK_VOID: 181 return "TOK_VOID"; 182 case TOK_CHAR: 183 return "TOK_CHAR"; 184 case TOK_SHORT: 185 return "TOK_SHORT"; 186 case TOK_INT: 187 return "TOK_INT"; 188 case TOK_LONG: 189 return "TOK_LONG"; 190 case TOK_FLOAT: 191 return "TOK_FLOAT"; 192 case TOK_DOUBLE: 193 return "TOK_DOUBLE"; 194 case TOK_SIGNED: 195 return "TOK_SIGNED"; 196 case TOK_UNSIGNED: 197 return "TOK_UNSIGNED"; 198 case TOK_STRUCT: 199 return "TOK_STRUCT"; 200 case TOK_UNION: 201 return "TOK_UNION"; 202 case TOK_ENUM: 203 return "TOK_ENUM"; 204 case TOK_TYPEDEF: 205 return "TOK_TYPEDEF"; 206 case TOK_AUTO: 207 return "TOK_AUTO"; 208 case TOK_REGISTER: 209 return "TOK_REGISTER"; 210 case TOK_STATIC: 211 return "TOK_STATIC"; 212 case TOK_EXTERN: 213 return "TOK_EXTERN"; 214 case TOK_CONST: 215 return "TOK_CONST"; 216 case TOK_VOLATILE: 217 return "TOK_VOLATILE"; 218 case TOK_SIZEOF: 219 return "TOK_SIZEOF"; 220 case TOK_ADD: 221 return "TOK_ADD"; 222 case TOK_SUB: 223 return "TOK_SUB"; 224 case TOK_MUL: 225 return "TOK_MUL"; 226 case TOK_DIV: 227 return "TOK_DIV"; 228 case TOK_MOD: 229 return "TOK_MOD"; 230 case TOK_BIT_AND: 231 return "TOK_BIT_AND"; 232 case TOK_BIT_OR: 233 return "TOK_BIT_OR"; 234 case TOK_BIT_XOR: 235 return "TOK_BIT_XOR"; 236 case TOK_BIT_NOT: 237 return "TOK_BIT_NOT"; 238 case TOK_LSHIFT: 239 return "TOK_LSHIFT"; 240 case TOK_RSHIFT: 241 return "TOK_RSHIFT"; 242 case TOK_NOT: 243 return "TOK_NOT"; 244 case TOK_ASSIGN: 245 return "TOK_ASSIGN"; 246 case TOK_LT: 247 return "TOK_LT"; 248 case TOK_GT: 249 return "TOK_GT"; 250 case TOK_INC: 251 return "TOK_INC"; 252 case TOK_DEC: 253 return "TOK_DEC"; 254 case TOK_EQ: 255 return "TOK_EQ"; 256 case TOK_NE: 257 return "TOK_NE"; 258 case TOK_LE: 259 return "TOK_LE"; 260 case TOK_GE: 261 return "TOK_GE"; 262 case TOK_AND: 263 return "TOK_AND"; 264 case TOK_OR: 265 return "TOK_OR"; 266 case TOK_MEMBER_POINTER: 267 return "TOK_MEMBER_POINTER"; 268 case TOK_MEMBER: 269 return "TOK_MEMBER"; 270 case TOK_COND_DECISION: 271 return "TOK_COND_DECISION"; 272 case TOK_COND: 273 return "TOK_COND"; 274 case TOK_ASSIGN_ADD: 275 return "TOK_ASSIGN_ADD"; 276 case TOK_ASSIGN_SUB: 277 return "TOK_ASSIGN_SUB"; 278 case TOK_ASSIGN_MUL: 279 return "TOK_ASSIGN_MUL"; 280 case TOK_ASSIGN_DIV: 281 return "TOK_ASSIGN_DIV"; 282 case TOK_ASSIGN_MOD: 283 return "TOK_ASSIGN_MOD"; 284 case TOK_ASSIGN_BITAND: 285 return "TOK_ASSIGN_BITAND"; 286 case TOK_ASSIGN_BITOR: 287 return "TOK_ASSIGN_BITOR"; 288 case TOK_ASSIGN_BITXOR: 289 return "TOK_ASSIGN_BITXOR"; 290 case TOK_ASSIGN_LSHIFT: 291 return "TOK_ASSIGN_LSHIFT"; 292 case TOK_ASSIGN_RSHIFT: 293 return "TOK_ASSIGN_RSHIFT"; 294 case TOK_HASH: 295 return "TOK_HASH"; 296 case TOK_ID: 297 return "TOK_ID"; 298 case TOK_TYPEDEF_NAME: 299 return "TOK_TYPEDEF_NAME"; 300 case TOK_INTEGER_U32: 301 return "TOK_INTEGER_U32"; 302 case TOK_INTEGER_U64: 303 return "TOK_INTEGER_U64"; 304 case TOK_INTEGER_S32: 305 return "TOK_INTEGER_S32"; 306 case TOK_INTEGER_S64: 307 return "TOK_INTEGER_S64"; 308 case TOK_FLOAT_32: 309 return "TOK_FLOAT_32"; 310 case TOK_FLOAT_64: 311 return "TOK_FLOAT_64"; 312 case TOK_CHAR_CONST: 313 return "TOK_CHAR_CONST"; 314 case TOK_STRING_ASCII: 315 return "TOK_STRING_ASCII"; 316 case TOK_EOF: 317 return "TOK_EOF"; 318 case TOK_ERROR: 319 return "TOK_ERROR"; 320 case TOK_LEFT_PAREN: 321 return "TOK_LEFT_PAREN"; 322 case TOK_RIGHT_PAREN: 323 return "TOK_RIGHT_PAREN"; 324 case TOK_LEFT_BRACKET: 325 return "TOK_LEFT_BRACKET"; 326 case TOK_RIGHT_BRACKET: 327 return "TOK_RIGHT_BRACKET"; 328 case TOK_LEFT_BRACE: 329 return "TOK_LEFT_BRACE"; 330 case TOK_RIGHT_BRACE: 331 return "TOK_RIGHT_BRACE"; 332 case TOK_COMMA: 333 return "TOK_COMMA"; 334 case TOK_SEMICOLON: 335 return "TOK_SEMICOLON"; 336 case TOK_DOT: 337 return "TOK_DOT"; 338 case TOK_ELLIPSIS: 339 return "TOK_ELLIPSIS"; 340 } 341 return "UNKNOWN"; 342 } 343 344 /* Unescape String */ 345 #define clamp(x, min, max) ((x) < (min) ? (min) : (x) > (max) ? (max) : (x)) 346 char *re_escape_string(const char *str) { 347 int len = strlen(str); 348 char *buf = malloc(len * 2 + 1); 349 if (!buf) { 350 fprintf(stderr, "Out of memory. Cannot escape string\n"); 351 exit(1); 352 } 353 int i = 0; 354 for (int j = 0; j < len; j++) { 355 switch (str[j]) { 356 case '\a': buf[i++] = '\\'; buf[i++] = 'a'; break; 357 case '\b': buf[i++] = '\\'; buf[i++] = 'b'; break; 358 case '\f': buf[i++] = '\\'; buf[i++] = 'f'; break; 359 case '\n': buf[i++] = '\\'; buf[i++] = 'n'; break; 360 case '\r': buf[i++] = '\\'; buf[i++] = 'r'; break; 361 case '\t': buf[i++] = '\\'; buf[i++] = 't'; break; 362 case '\v': buf[i++] = '\\'; buf[i++] = 'v'; break; 363 case '\\': buf[i++] = '\\'; buf[i++] = '\\'; break; 364 case '\'': buf[i++] = '\\'; buf[i++] = '\''; break; 365 case '"': buf[i++] = '\\'; buf[i++] = '"'; break; 366 default: { 367 if (isprint(str[j])) { 368 buf[i++] = str[j]; 369 } else { 370 buf[i++] = '\\'; 371 buf[i++] = 'x'; 372 buf[i++] = "0123456789abcdef"[clamp(str[j] >> 4, 0, 0xf)]; 373 buf[i++] = "0123456789abcdef"[clamp(str[j] & 0xf, 0, 0xf)]; 374 } 375 } 376 } 377 } 378 buf[i] = '\0'; 379 return buf; 380 } 381 382 /* Print Token */ 383 void print_token(token_t *tok) { 384 if (!tok) { 385 printf("NULL\n"); 386 return; 387 } 388 const char *name = token_name_from_type(tok->kind); 389 switch (tok->kind) { 390 case TOK_ID: 391 case TOK_STRING_ASCII: { 392 char *escaped = re_escape_string(token_string(tok)); 393 printf("%s: \"%s\"@%d:%d\n", name, escaped, tok->line, tok->column); 394 free(escaped); 395 break; 396 } 397 case TOK_TYPEDEF_NAME: { 398 char *escaped = re_escape_string(token_string(tok)); 399 printf("%s: %s@%d:%d\n", name, escaped, tok->line, tok->column); 400 free(escaped); 401 break; 402 } 403 case TOK_CHAR_CONST: 404 printf("%s: '%c'@%d:%d\n", name, token_char(tok), tok->line, tok->column); 405 break; 406 case TOK_INTEGER_S32: 407 case TOK_INTEGER_U32: 408 case TOK_INTEGER_S64: 409 case TOK_INTEGER_U64: 410 printf("%s: %ld@%d:%d\n", name, token_int(tok), tok->line, tok->column); 411 break; 412 case TOK_FLOAT_32: 413 case TOK_FLOAT_64: 414 printf("%s: %f@%d:%d\n", name, token_float(tok), tok->line, tok->column); 415 break; 416 default: 417 printf("%s@%d:%d\n", name, tok->line, tok->column); 418 break; 419 } 420 } 421 422 423