commit 8439e14a4356442171bad9768ce804cbb038a33b
parent 225bd0f59e642de4dd0330a12746eeda56286da6
Author: Reagan <rfische2@uccs.edu>
Date: Thu, 22 Aug 2024 11:43:01 -0600
lexer new
Diffstat:
6 files changed, 494 insertions(+), 121 deletions(-)
diff --git a/index.html b/index.html
@@ -58,8 +58,8 @@
</script>
<h2>Hello and welcome to my website!</h2>
- <p>Hi. My name is Reagan Fischer. I'm a Computer Science and Mathematics major at UCCS with interests in
- embedded systems engineering, compilers, and operating systems. I'm creating this website to collate my
+ <p>Hi! My name is Reagan Fischer. I'm a Computer Science and Mathematics major at UCCS with interests in
+ embedded systems engineering, compilers, and operating systems. I've created this website to collate my
projects and share my code. I hope you find something you enjoy!</p>
</main>
diff --git a/projects.html b/projects.html
@@ -90,7 +90,7 @@
</p>
<ol>
<li>
- <a href="projects/cminus/lexer.html">Step 1: Lexing</a>
+ <a href="projects/cminus/lexer_new.html">Step 1: Lexing</a>
</li>
</ol>
<h2 id="net" name="net">Mock Networks</h2>
diff --git a/projects/cminus/lexer.lit b/projects/cminus/lexer.lit
@@ -1,4 +0,0 @@
-@code_type c .c
-@comment_type /* %s */
-@compiler lit -t lexer.lit && gcc -Wall -Wextra -Wpedantic -Wstrict-aliasing=3 -Wwrite-strings -Wvla -Wcast-align=strict -Wstrict-prototypes -Wstringop-overflow=4 -Wshadow -fanalyzer tokenizer.c input.c hash_table.c -D TOK_TEST -g -O0 && rm a.out
-
diff --git a/projects/cminus/lexer_new.html b/projects/cminus/lexer_new.html
@@ -0,0 +1,288 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Lexer</title>
+<script>
+!function(){var q=null;window.PR_SHOULD_USE_CONTINUATION=!0;
+(function(){function R(a){function d(e){var b=e.charCodeAt(0);if(b!==92)return b;var a=e.charAt(1);return(b=r[a])?b:"0"<=a&&a<="7"?parseInt(e.substring(1),8):a==="u"||a==="x"?parseInt(e.substring(2),16):e.charCodeAt(1)}function g(e){if(e<32)return(e<16?"\\x0":"\\x")+e.toString(16);e=String.fromCharCode(e);return e==="\\"||e==="-"||e==="]"||e==="^"?"\\"+e:e}function b(e){var b=e.substring(1,e.length-1).match(/\\u[\dA-Fa-f]{4}|\\x[\dA-Fa-f]{2}|\\[0-3][0-7]{0,2}|\\[0-7]{1,2}|\\[\S\s]|[^\\]/g),e=[],a=
+b[0]==="^",c=["["];a&&c.push("^");for(var a=a?1:0,f=b.length;a<f;++a){var h=b[a];if(/\\[bdsw]/i.test(h))c.push(h);else{var h=d(h),l;a+2<f&&"-"===b[a+1]?(l=d(b[a+2]),a+=2):l=h;e.push([h,l]);l<65||h>122||(l<65||h>90||e.push([Math.max(65,h)|32,Math.min(l,90)|32]),l<97||h>122||e.push([Math.max(97,h)&-33,Math.min(l,122)&-33]))}}e.sort(function(e,a){return e[0]-a[0]||a[1]-e[1]});b=[];f=[];for(a=0;a<e.length;++a)h=e[a],h[0]<=f[1]+1?f[1]=Math.max(f[1],h[1]):b.push(f=h);for(a=0;a<b.length;++a)h=b[a],c.push(g(h[0])),
+h[1]>h[0]&&(h[1]+1>h[0]&&c.push("-"),c.push(g(h[1])));c.push("]");return c.join("")}function s(e){for(var a=e.source.match(/\[(?:[^\\\] ]|\\[\S\s])*]|\\u[\dA-Fa-f]{4}|\\x[\dA-Fa-f]{2}|\\\d+|\\[^\dux]|\(\?[!:=]|[()^]|[^()[\\^]+/g),c=a.length,d=[],f=0,h=0;f<c;++f){var l=a[f];l==="("?++h:"\\"===l.charAt(0)&&(l=+l.substring(1))&&(l<=h?d[l]=-1:a[f]=g(l))}for(f=1;f<d.length;++f)-1===d[f]&&(d[f]=++x);for(h=f=0;f<c;++f)l=a[f],l==="("?(++h,d[h]||(a[f]="(?:")):"\\"===l.charAt(0)&&(l=+l.substring(1))&&l<=h&&
+(a[f]="\\"+d[l]);for(f=0;f<c;++f)"^"===a[f]&&"^"!==a[f+1]&&(a[f]="");if(e.ignoreCase&&m)for(f=0;f<c;++f)l=a[f],e=l.charAt(0),l.length>=2&&e==="["?a[f]=b(l):e!=="\\"&&(a[f]=l.replace(/[A-Za-z]/g,function(a){a=a.charCodeAt(0);return"["+String.fromCharCode(a&-33,a|32)+"]"}));return a.join("")}for(var x=0,m=!1,j=!1,k=0,c=a.length;k<c;++k){var i=a[k];if(i.ignoreCase)j=!0;else if(/[a-z]/i.test(i.source.replace(/\\u[\da-f]{4}|\\x[\da-f]{2}|\\[^UXux]/gi,""))){m=!0;j=!1;break}}for(var r={b:8,t:9,n:10,v:11,
+f:12,r:13},n=[],k=0,c=a.length;k<c;++k){i=a[k];if(i.global||i.multiline)throw Error(""+i);n.push("(?:"+s(i)+")")}return RegExp(n.join("|"),j?"gi":"g")}function S(a,d){function g(a){var c=a.nodeType;if(c==1){if(!b.test(a.className)){for(c=a.firstChild;c;c=c.nextSibling)g(c);c=a.nodeName.toLowerCase();if("br"===c||"li"===c)s[j]="\n",m[j<<1]=x++,m[j++<<1|1]=a}}else if(c==3||c==4)c=a.nodeValue,c.length&&(c=d?c.replace(/\r\n?/g,"\n"):c.replace(/[\t\n\r ]+/g," "),s[j]=c,m[j<<1]=x,x+=c.length,m[j++<<1|1]=
+a)}var b=/(?:^|\s)nocode(?:\s|$)/,s=[],x=0,m=[],j=0;g(a);return{a:s.join("").replace(/\n$/,""),d:m}}function H(a,d,g,b){d&&(a={a:d,e:a},g(a),b.push.apply(b,a.g))}function T(a){for(var d=void 0,g=a.firstChild;g;g=g.nextSibling)var b=g.nodeType,d=b===1?d?a:g:b===3?U.test(g.nodeValue)?a:d:d;return d===a?void 0:d}function D(a,d){function g(a){for(var j=a.e,k=[j,"pln"],c=0,i=a.a.match(s)||[],r={},n=0,e=i.length;n<e;++n){var z=i[n],w=r[z],t=void 0,f;if(typeof w==="string")f=!1;else{var h=b[z.charAt(0)];
+if(h)t=z.match(h[1]),w=h[0];else{for(f=0;f<x;++f)if(h=d[f],t=z.match(h[1])){w=h[0];break}t||(w="pln")}if((f=w.length>=5&&"lang-"===w.substring(0,5))&&!(t&&typeof t[1]==="string"))f=!1,w="src";f||(r[z]=w)}h=c;c+=z.length;if(f){f=t[1];var l=z.indexOf(f),B=l+f.length;t[2]&&(B=z.length-t[2].length,l=B-f.length);w=w.substring(5);H(j+h,z.substring(0,l),g,k);H(j+h+l,f,I(w,f),k);H(j+h+B,z.substring(B),g,k)}else k.push(j+h,w)}a.g=k}var b={},s;(function(){for(var g=a.concat(d),j=[],k={},c=0,i=g.length;c<i;++c){var r=
+g[c],n=r[3];if(n)for(var e=n.length;--e>=0;)b[n.charAt(e)]=r;r=r[1];n=""+r;k.hasOwnProperty(n)||(j.push(r),k[n]=q)}j.push(/[\S\s]/);s=R(j)})();var x=d.length;return g}function v(a){var d=[],g=[];a.tripleQuotedStrings?d.push(["str",/^(?:'''(?:[^'\\]|\\[\S\s]|''?(?=[^']))*(?:'''|$)|"""(?:[^"\\]|\\[\S\s]|""?(?=[^"]))*(?:"""|$)|'(?:[^'\\]|\\[\S\s])*(?:'|$)|"(?:[^"\\]|\\[\S\s])*(?:"|$))/,q,"'\""]):a.multiLineStrings?d.push(["str",/^(?:'(?:[^'\\]|\\[\S\s])*(?:'|$)|"(?:[^"\\]|\\[\S\s])*(?:"|$)|`(?:[^\\`]|\\[\S\s])*(?:`|$))/,
+q,"'\"`"]):d.push(["str",/^(?:'(?:[^\n\r'\\]|\\.)*(?:'|$)|"(?:[^\n\r"\\]|\\.)*(?:"|$))/,q,"\"'"]);a.verbatimStrings&&g.push(["str",/^@"(?:[^"]|"")*(?:"|$)/,q]);var b=a.hashComments;b&&(a.cStyleComments?(b>1?d.push(["com",/^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/,q,"#"]):d.push(["com",/^#(?:(?:define|e(?:l|nd)if|else|error|ifn?def|include|line|pragma|undef|warning)\b|[^\n\r]*)/,q,"#"]),g.push(["str",/^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h(?:h|pp|\+\+)?|[a-z]\w*)>/,q])):d.push(["com",
+/^#[^\n\r]*/,q,"#"]));a.cStyleComments&&(g.push(["com",/^\/\/[^\n\r]*/,q]),g.push(["com",/^\/\*[\S\s]*?(?:\*\/|$)/,q]));if(b=a.regexLiterals){var s=(b=b>1?"":"\n\r")?".":"[\\S\\s]";g.push(["lang-regex",RegExp("^(?:^^\\.?|[+-]|[!=]=?=?|\\#|%=?|&&?=?|\\(|\\*=?|[+\\-]=|->|\\/=?|::?|<<?=?|>>?>?=?|,|;|\\?|@|\\[|~|{|\\^\\^?=?|\\|\\|?=?|break|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\\s*("+("/(?=[^/*"+b+"])(?:[^/\\x5B\\x5C"+b+"]|\\x5C"+s+"|\\x5B(?:[^\\x5C\\x5D"+b+"]|\\x5C"+
+s+")*(?:\\x5D|$))+/")+")")])}(b=a.types)&&g.push(["typ",b]);b=(""+a.keywords).replace(/^ | $/g,"");b.length&&g.push(["kwd",RegExp("^(?:"+b.replace(/[\s,]+/g,"|")+")\\b"),q]);d.push(["pln",/^\s+/,q," \r\n\t\u00a0"]);b="^.[^\\s\\w.$@'\"`/\\\\]*";a.regexLiterals&&(b+="(?!s*/)");g.push(["lit",/^@[$_a-z][\w$@]*/i,q],["typ",/^(?:[@_]?[A-Z]+[a-z][\w$@]*|\w+_t\b)/,q],["pln",/^[$_a-z][\w$@]*/i,q],["lit",/^(?:0x[\da-f]+|(?:\d(?:_\d+)*\d*(?:\.\d*)?|\.\d\+)(?:e[+-]?\d+)?)[a-z]*/i,q,"0123456789"],["pln",/^\\[\S\s]?/,
+q],["pun",RegExp(b),q]);return D(d,g)}function J(a,d,g){function b(a){var c=a.nodeType;if(c==1&&!x.test(a.className))if("br"===a.nodeName)s(a),a.parentNode&&a.parentNode.removeChild(a);else for(a=a.firstChild;a;a=a.nextSibling)b(a);else if((c==3||c==4)&&g){var d=a.nodeValue,i=d.match(m);if(i)c=d.substring(0,i.index),a.nodeValue=c,(d=d.substring(i.index+i[0].length))&&a.parentNode.insertBefore(j.createTextNode(d),a.nextSibling),s(a),c||a.parentNode.removeChild(a)}}function s(a){function b(a,c){var d=
+c?a.cloneNode(!1):a,e=a.parentNode;if(e){var e=b(e,1),g=a.nextSibling;e.appendChild(d);for(var i=g;i;i=g)g=i.nextSibling,e.appendChild(i)}return d}for(;!a.nextSibling;)if(a=a.parentNode,!a)return;for(var a=b(a.nextSibling,0),d;(d=a.parentNode)&&d.nodeType===1;)a=d;c.push(a)}for(var x=/(?:^|\s)nocode(?:\s|$)/,m=/\r\n?|\n/,j=a.ownerDocument,k=j.createElement("li");a.firstChild;)k.appendChild(a.firstChild);for(var c=[k],i=0;i<c.length;++i)b(c[i]);d===(d|0)&&c[0].setAttribute("value",d);var r=j.createElement("ol");
+r.className="linenums";for(var d=Math.max(0,d-1|0)||0,i=0,n=c.length;i<n;++i)k=c[i],k.className="L"+(i+d)%10,k.firstChild||k.appendChild(j.createTextNode("\u00a0")),r.appendChild(k);a.appendChild(r)}function p(a,d){for(var g=d.length;--g>=0;){var b=d[g];F.hasOwnProperty(b)?E.console&&console.warn("cannot override language handler %s",b):F[b]=a}}function I(a,d){if(!a||!F.hasOwnProperty(a))a=/^\s*</.test(d)?"default-markup":"default-code";return F[a]}function K(a){var d=a.h;try{var g=S(a.c,a.i),b=g.a;
+a.a=b;a.d=g.d;a.e=0;I(d,b)(a);var s=/\bMSIE\s(\d+)/.exec(navigator.userAgent),s=s&&+s[1]<=8,d=/\n/g,x=a.a,m=x.length,g=0,j=a.d,k=j.length,b=0,c=a.g,i=c.length,r=0;c[i]=m;var n,e;for(e=n=0;e<i;)c[e]!==c[e+2]?(c[n++]=c[e++],c[n++]=c[e++]):e+=2;i=n;for(e=n=0;e<i;){for(var p=c[e],w=c[e+1],t=e+2;t+2<=i&&c[t+1]===w;)t+=2;c[n++]=p;c[n++]=w;e=t}c.length=n;var f=a.c,h;if(f)h=f.style.display,f.style.display="none";try{for(;b<k;){var l=j[b+2]||m,B=c[r+2]||m,t=Math.min(l,B),A=j[b+1],G;if(A.nodeType!==1&&(G=x.substring(g,
+t))){s&&(G=G.replace(d,"\r"));A.nodeValue=G;var L=A.ownerDocument,o=L.createElement("span");o.className=c[r+1];var v=A.parentNode;v.replaceChild(o,A);o.appendChild(A);g<l&&(j[b+1]=A=L.createTextNode(x.substring(t,l)),v.insertBefore(A,o.nextSibling))}g=t;g>=l&&(b+=2);g>=B&&(r+=2)}}finally{if(f)f.style.display=h}}catch(u){E.console&&console.log(u&&u.stack||u)}}var E=window,y=["break,continue,do,else,for,if,return,while"],C=[[y,"auto,case,char,const,default,double,enum,extern,float,goto,inline,int,long,register,short,signed,sizeof,static,struct,switch,typedef,union,unsigned,void,volatile"],
+"catch,class,delete,false,import,new,operator,private,protected,public,this,throw,true,try,typeof"],M=[C,"alignof,align_union,asm,axiom,bool,concept,concept_map,const_cast,constexpr,decltype,delegate,dynamic_cast,explicit,export,friend,generic,late_check,mutable,namespace,nullptr,property,reinterpret_cast,static_assert,static_cast,template,typeid,typename,using,virtual,where"],V=[C,"abstract,assert,boolean,byte,extends,final,finally,implements,import,instanceof,interface,null,native,package,strictfp,super,synchronized,throws,transient"],
+N=[C,"abstract,as,base,bool,by,byte,checked,decimal,delegate,descending,dynamic,event,finally,fixed,foreach,from,group,implicit,in,interface,internal,into,is,let,lock,null,object,out,override,orderby,params,partial,readonly,ref,sbyte,sealed,stackalloc,string,select,uint,ulong,unchecked,unsafe,ushort,var,virtual,where"],C=[C,"debugger,eval,export,function,get,null,set,undefined,var,with,Infinity,NaN"],O=[y,"and,as,assert,class,def,del,elif,except,exec,finally,from,global,import,in,is,lambda,nonlocal,not,or,pass,print,raise,try,with,yield,False,True,None"],
+P=[y,"alias,and,begin,case,class,def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo,rescue,retry,self,super,then,true,undef,unless,until,when,yield,BEGIN,END"],W=[y,"as,assert,const,copy,drop,enum,extern,fail,false,fn,impl,let,log,loop,match,mod,move,mut,priv,pub,pure,ref,self,static,struct,true,trait,type,unsafe,use"],y=[y,"case,done,elif,esac,eval,fi,function,in,local,set,then,until"],Q=/^(DIR|FILE|vector|(de|priority_)?queue|list|stack|(const_)?iterator|(multi)?(set|map)|bitset|u?(int|float)\d*)\b/,
+U=/\S/,X=v({keywords:[M,N,C,"caller,delete,die,do,dump,elsif,eval,exit,foreach,for,goto,if,import,last,local,my,next,no,our,print,package,redo,require,sub,undef,unless,until,use,wantarray,while,BEGIN,END",O,P,y],hashComments:!0,cStyleComments:!0,multiLineStrings:!0,regexLiterals:!0}),F={};p(X,["default-code"]);p(D([],[["pln",/^[^<?]+/],["dec",/^<!\w[^>]*(?:>|$)/],["com",/^<\!--[\S\s]*?(?:--\>|$)/],["lang-",/^<\?([\S\s]+?)(?:\?>|$)/],["lang-",/^<%([\S\s]+?)(?:%>|$)/],["pun",/^(?:<[%?]|[%?]>)/],["lang-",
+/^<xmp\b[^>]*>([\S\s]+?)<\/xmp\b[^>]*>/i],["lang-js",/^<script\b[^>]*>([\S\s]*?)(<\/script\b[^>]*>)/i],["lang-css",/^<style\b[^>]*>([\S\s]*?)(<\/style\b[^>]*>)/i],["lang-in.tag",/^(<\/?[a-z][^<>]*>)/i] ]),["default-markup","htm","html","mxml","xhtml","xml","xsl"]);p(D([["pln",/^\s+/,q," \t\r\n"],["atv",/^(?:"[^"]*"?|'[^']*'?)/,q,"\"'"] ],[["tag",/^^<\/?[a-z](?:[\w-.:]*\w)?|\/?>$/i],["atn",/^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i],["lang-uq.val",/^=\s*([^\s"'>]*(?:[^\s"'/>]|\/(?=\s)))/],["pun",/^[/<->]+/],
+["lang-js",/^on\w+\s*=\s*"([^"]+)"/i],["lang-js",/^on\w+\s*=\s*'([^']+)'/i],["lang-js",/^on\w+\s*=\s*([^\s"'>]+)/i],["lang-css",/^style\s*=\s*"([^"]+)"/i],["lang-css",/^style\s*=\s*'([^']+)'/i],["lang-css",/^style\s*=\s*([^\s"'>]+)/i] ]),["in.tag"]);p(D([],[["atv",/^[\S\s]+/] ]),["uq.val"]);p(v({keywords:M,hashComments:!0,cStyleComments:!0,types:Q}),["c","cc","cpp","cxx","cyc","m"]);p(v({keywords:"null,true,false"}),["json"]);p(v({keywords:N,hashComments:!0,cStyleComments:!0,verbatimStrings:!0,types:Q}),
+["cs"]);p(v({keywords:V,cStyleComments:!0}),["java"]);p(v({keywords:y,hashComments:!0,multiLineStrings:!0}),["bash","bsh","csh","sh"]);p(v({keywords:O,hashComments:!0,multiLineStrings:!0,tripleQuotedStrings:!0}),["cv","py","python"]);p(v({keywords:"caller,delete,die,do,dump,elsif,eval,exit,foreach,for,goto,if,import,last,local,my,next,no,our,print,package,redo,require,sub,undef,unless,until,use,wantarray,while,BEGIN,END",hashComments:!0,multiLineStrings:!0,regexLiterals:2}),["perl","pl","pm"]);p(v({keywords:P,
+hashComments:!0,multiLineStrings:!0,regexLiterals:!0}),["rb","ruby"]);p(v({keywords:C,cStyleComments:!0,regexLiterals:!0}),["javascript","js"]);p(v({keywords:"all,and,by,catch,class,else,extends,false,finally,for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then,throw,true,try,unless,until,when,while,yes",hashComments:3,cStyleComments:!0,multilineStrings:!0,tripleQuotedStrings:!0,regexLiterals:!0}),["coffee"]);p(v({keywords:W,cStyleComments:!0,multilineStrings:!0}),["rc","rs","rust"]);
+p(D([],[["str",/^[\S\s]+/] ]),["regex"]);var Y=E.PR={createSimpleLexer:D,registerLangHandler:p,sourceDecorator:v,PR_ATTRIB_NAME:"atn",PR_ATTRIB_VALUE:"atv",PR_COMMENT:"com",PR_DECLARATION:"dec",PR_KEYWORD:"kwd",PR_LITERAL:"lit",PR_NOCODE:"nocode",PR_PLAIN:"pln",PR_PUNCTUATION:"pun",PR_SOURCE:"src",PR_STRING:"str",PR_TAG:"tag",PR_TYPE:"typ",prettyPrintOne:E.prettyPrintOne=function(a,d,g){var b=document.createElement("div");b.innerHTML="<pre>"+a+"</pre>";b=b.firstChild;g&&J(b,g,!0);K({h:d,j:g,c:b,i:1});
+return b.innerHTML},prettyPrint:E.prettyPrint=function(a,d){function g(){for(var b=E.PR_SHOULD_USE_CONTINUATION?c.now()+250:Infinity;i<p.length&&c.now()<b;i++){for(var d=p[i],j=h,k=d;k=k.previousSibling;){var m=k.nodeType,o=(m===7||m===8)&&k.nodeValue;if(o?!/^\??prettify\b/.test(o):m!==3||/\S/.test(k.nodeValue))break;if(o){j={};o.replace(/\b(\w+)=([\w%+\-.:]+)/g,function(a,b,c){j[b]=c});break}}k=d.className;if((j!==h||e.test(k))&&!v.test(k)){m=!1;for(o=d.parentNode;o;o=o.parentNode)if(f.test(o.tagName)&&
+o.className&&e.test(o.className)){m=!0;break}if(!m){d.className+=" prettyprinted";m=j.lang;if(!m){var m=k.match(n),y;if(!m&&(y=T(d))&&t.test(y.tagName))m=y.className.match(n);m&&(m=m[1])}if(w.test(d.tagName))o=1;else var o=d.currentStyle,u=s.defaultView,o=(o=o?o.whiteSpace:u&&u.getComputedStyle?u.getComputedStyle(d,q).getPropertyValue("white-space"):0)&&"pre"===o.substring(0,3);u=j.linenums;if(!(u=u==="true"||+u))u=(u=k.match(/\blinenums\b(?::(\d+))?/))?u[1]&&u[1].length?+u[1]:!0:!1;u&&J(d,u,o);r=
+{h:m,c:d,j:u,i:o};K(r)}}}i<p.length?setTimeout(g,250):"function"===typeof a&&a()}for(var b=d||document.body,s=b.ownerDocument||document,b=[b.getElementsByTagName("pre"),b.getElementsByTagName("code"),b.getElementsByTagName("xmp")],p=[],m=0;m<b.length;++m)for(var j=0,k=b[m].length;j<k;++j)p.push(b[m][j]);var b=q,c=Date;c.now||(c={now:function(){return+new Date}});var i=0,r,n=/\blang(?:uage)?-([\w.]+)(?!\S)/,e=/\bprettyprint\b/,v=/\bprettyprinted\b/,w=/pre|xmp/i,t=/^code$/i,f=/^(?:pre|code|xmp)$/i,
+h={};g()}};typeof define==="function"&&define.amd&&define("google-code-prettify",[],function(){return Y})})();}()
+</script>
+<style>
+.pln{color:#1b181b}.str{color:#918b3b}.kwd{color:#7b59c0}.com{color:#9e8f9e}.typ{color:#516aec}.lit{color:#a65926}.clo,.opn,.pun{color:#1b181b}.tag{color:#ca402b}.atn{color:#a65926}.atv{color:#159393}.dec{color:#a65926}.var{color:#ca402b}.fun{color:#516aec}pre.prettyprint{background:#f7f3f7;color:#ab9bab;font-family:Menlo,Consolas,"Bitstream Vera Sans Mono","DejaVu Sans Mono",Monaco,monospace;font-size:12px;line-height:1.5;border:1px solid #d8cad8;padding:10px}ol.linenums{margin-top:0;margin-bottom:0}
+body{min-width:200px;max-width:850px;margin:0 auto;padding:30px;}.chapter-nav{font-size: 10pt;}a:link,a:visited{color:#00f}.codeblock_name,code,pre.prettyprint{font-family:Monaco,"Lucida Console",monospace}body{font-size:14pt}.codeblock_name,.math,.seealso,code{font-size:10pt}.codeblock{page-break-inside:avoid;padding-bottom:15px}.math{text-indent:0}pre.prettyprint{font-size:10pt;padding:10px;border-radius:10px;border:none;white-space:pre-wrap}.codeblock_name{margin-top:1.25em;display:block}a:link{text-decoration:none}a:link:not(.lit):hover{color:#00f;text-decoration:underline}a:link:active{color:red}h4{padding-right:1.25em}h4.noheading{margin-bottom:0}h1{text-align:center}code{padding:2px}pre{-moz-tab-size:4;-o-tab-size:4;tab-size:4}p:not(.notp){margin:0;text-indent:2em}.two-col{list-style-type:none}.two-col li:before{content:'-';padding:5px;margin-right:5px;color:orange;background-color:#fff;display:inline-block}@media print{body{font-size:10pt}pre.prettyprint{font-size:8pt}.seealso{font-size:9pt}.codeblock_name,.math,code{font-size:8pt}.math{text-indent:0}}
+/* code blocks (Style from jmeiners.com/lc3-vm, CC BY-NC-SA 4.0, used with attribution) */
+code,
+.block-header,
+.file-name
+ {
+ font-size: 11pt;
+ font-family: 'Fira Mono', Menlo, Monaco, Consolas, Liberation Mono, Courier New, monospace;
+}
+
+.file-name-hr
+{
+ font-size: 13pt;
+ font-family: 'Fira Mono', Menlo, Monaco, Consolas, Liberation Mono, Courier New, monospace;
+}
+
+/* Quotes and Block Quotes */
+blockquote {
+ margin: 1.5em 10px;
+ padding: 0.5em 10px;
+ border-left: 5px solid #ccc;
+ color: #666;
+ background-color: #f9f9f9;
+ font-style: italic;
+}
+
+blockquote p {
+ margin: 0;
+ font-size: 1.2em;
+}
+
+q {
+ quotes: "“" "”" "‘" "’";
+ font-style: italic;
+}
+
+q::before {
+ content: open-quote;
+}
+
+q::after {
+ content: close-quote;
+}
+
+/*! Color themes for Google Code Prettify | MIT License | github.com/jmblog/color-themes-for-google-code-prettify */
+.prettyprint {
+ background: #f5f7ff;
+ font-family: Menlo, "Bitstream Vera Sans Mono", "DejaVu Sans Mono", Monaco, Consolas, monospace;
+ border: 0 !important;
+}
+
+.pln {
+ color: #202746;
+}
+
+/* Specify class=linenums on a pre to get line numbering */
+ol.linenums {
+ margin-top: 0;
+ margin-bottom: 0;
+ color: #202746;
+}
+
+li.L0,
+li.L1,
+li.L2,
+li.L3,
+li.L4,
+li.L5,
+li.L6,
+li.L7,
+li.L8,
+li.L9 {
+ padding-left: 1em;
+ background-color: #f5f7ff;
+ list-style-type: decimal;
+}
+
+@media screen {
+
+ /* string content */
+
+ .str {
+ color: #ac9739;
+ }
+
+ /* keyword */
+
+ .kwd {
+ color: #6679cc;
+ }
+
+ /* comment */
+
+ .com {
+ color: #202746;
+ }
+
+ /* type name */
+
+ .typ {
+ color: #3d8fd1;
+ }
+
+ /* literal value */
+
+ .lit {
+ color: #c76b29;
+ }
+
+ /* punctuation */
+
+ .pun {
+ color: #202746;
+ }
+
+ /* lisp open bracket */
+
+ .opn {
+ color: #202746;
+ }
+
+ /* lisp close bracket */
+
+ .clo {
+ color: #202746;
+ }
+
+ /* markup tag name */
+
+ .tag {
+ color: #c94922;
+ }
+
+ /* markup attribute name */
+
+ .atn {
+ color: #c76b29;
+ }
+
+ /* markup attribute value */
+
+ .atv {
+ color: #22a2c9;
+ }
+
+ /* declaration */
+
+ .dec {
+ color: #c76b29;
+ }
+
+ /* variable name */
+
+ .var {
+ color: #c94922;
+ }
+
+ /* function name */
+
+ .fun {
+ color: #3d8fd1;
+ }
+}</style>
+</head>
+<body onload="prettyPrint()">
+<section>
+<h1>Lexer</h1>
+<a name="1:1"><div class="section"><h4>1. General Project Structure</h4></a>
+<p>Since this is the first article, I'll outline the project structure for the C- compiler.
+</p>
+<p>The project has a series of pretty typical stages:
+</p>
+<ol>
+<li>The lexer. This takes a file as input and emits a series of tokens (Its input is already preprocessed, I outsource that to "gcc -E").
+</li>
+<li>The parser. This takes the tokens and builds an abstract syntax tree (AST).
+</li>
+<li>The symbol table. This exists in a sort of in-between space next to the lexer and parser. It's used to store information about variables and functions.
+</li>
+<li>The type checker. This is used to ensure that the types of variables and functions are correct.
+</li>
+<li>The code generator. This takes the AST and generates an intermediate representation (IR).
+</li>
+<li>The optimizer. This takes the IR and optimizes it. This'll be broken up into a few stages.
+</li>
+<li>The lowerer. This takes the IR and lowers it to a simpler IR.
+</li>
+<li>The register allocator. This takes the IR, which has instructions in an infinite number of registers, and assigns them to a finite number of registers.
+</li>
+<li>The code emitter. This takes the IR and emits RISC-V assembly.
+</li>
+</ol>
+<p>As far as possible, I'd like to keep each of these stages separate. One benefit of this is that it simplifies memory management greatly. I plan to use an arena allocator for each stage, and by making sure the only thing on the actual heap is the output of the stage, and all temporary data is stored in the arena, I can free all the memory used by a stage by simply freeing the arena.
+</p>
+<h2> Some Rules</h2>
+<p>Here are some rules (more like guidelines) that I plan to follow for this project; they're mostly just to keep things simple and consistent.
+</p>
+<h4> 1. PROGRAM LIKE IT'S 1999</h4>
+<blockquote><p> 640 KB ought to be enough for anybody. - Bill Gates
+</p>
+</blockquote>
+<p>Maybe not that little, But I'm going to try to keep the project as simple as possible, 640 KB probably won't be enough, but I'll still aim for less than 10 MB of memory usage.
+</p>
+<p>This places a lot of constraints on the project, but I think it's a good exercise in minimalism.
+</p>
+<p>Some consequences of this are that I'll have to use memory-wise algorithms, be very careful about program structure, and avoid some of the bigger libraries (which will help with making this project self-hosting in the future).
+</p>
+<h4> 2. PROGRAM IN C++--</h4>
+<p>I'm not a big fan of C++, but its class system helps prevent a lot of ugly bugs. To that end, I'm going to try and keep data structures out of header files, and only expose functions that operate on those data structures, to create a sort of approximation of a class. This has a few benefits:
+</p>
+<ul>
+<li>Quicker compilation. A change to a data structure will only require one file to be recompiled, rather than every file that includes the header.
+</li>
+<li>Less chance of bugs. If a function is the only way to interact with a data structure, then it's much harder to misuse that data structure.
+</li>
+<li>Run time type checking. I can include some sort of tag in the first field of every data structure to ensure that the correct functions are being called.
+</li>
+</ul>
+<h4> 3. DON'T GET FANCY</h4>
+<p>My goal here isn't to write the fastest interpreter in the world, or the most complete. I just want to make something that works and can be understood by someone else.
+</p>
+<p>That means I'm going to avoid a lot of the tricks that are used in production interpreters, and focus more on simplicity and readability.
+</p>
+<h4> 4. DESIGN FOR DEBUGGING</h4>
+<p>This code is going to be peppered with asserts and contain mechanisms to print out the state of the program at any point.
+</p>
+<p>This might be painful, but it'll make debugging a lot simpler and let users look under the hood.
+</p>
+<h4> 5. SMART DATA, STUPID CODE</h4>
+<p>A lot of times, the right data structure can replace 50-100 lines of procedural code. I'm going to try and design data structures which make the algorithms as simple as possible.
+</p>
+<p>For example, instead of writing 50-100 lines of code to hold every keyword in the language, I can just use a simple hash table.
+</p>
+<h4> Misc</h4>
+<p>THIS IS A LITERATE PROGRAM! Go to <a href="https://reagancfischer.dev/lexer.lit">this link</a> to see the file that generated this HTML.
+</p>
+<h2> The Lexer</h2>
+<p>A lexical analyzer reads source code and produces tokens, which are the smallest unit of meaning in a language. For example, in the C programming language, the tokens are things like keywords (if, else, while, etc.), identifiers (variable names), numbers, and punctuation (braces, semicolons, etc.).
+</p>
+<p>Given a string like <code>int main() { return 0; }</code>, the lexer would produce a series of tokens like <code>INT</code>, <code>IDENTIFIER(main)</code>, <code>LPAREN</code>, <code>RPAREN</code>, <code>LBRACE</code>, <code>RETURN</code>, <code>INTCONSTANT(0)</code>, <code>SEMICOLON</code>, <code>RBRACE</code>.
+</p>
+<h3> Design</h3>
+<p>I'll break the lexer up into two modules, <code>tokenizer.c</code> and <code>input.c</code>. The input module will be responsible for reading the file and providing characters to the tokenizer, while the tokenizer module will be responsible for producing tokens.
+</p>
+<h3> Input</h3>
+<h4> Input Interface</h4>
+
+</div>
+</body>
diff --git a/projects/cminus/lexer_new.lit b/projects/cminus/lexer_new.lit
@@ -0,0 +1,86 @@
+@code_type c .c
+@comment_type /* %s */
+@compiler lit -t lexer.lit && gcc -Wall -Wextra -Wpedantic -Wstrict-aliasing=3 -Wwrite-strings -Wvla -Wcast-align=strict -Wstrict-prototypes -Wstringop-overflow=4 -Wshadow -fanalyzer tokenizer.c input.c hash_table.c -D TOK_TEST -g -O0 && rm a.out
+
+@title Lexer
+@add_css ../style.css
+@s General Project Structure
+Since this is the first article, I'll outline the project structure for the C- compiler.
+
+The project has a series of pretty typical stages:
+
+1. The lexer. This takes a file as input and emits a series of tokens (Its input is already preprocessed, I outsource that to "gcc -E").
+2. The parser. This takes the tokens and builds an abstract syntax tree (AST).
+3. The symbol table. This exists in a sort of in-between space next to the lexer and parser. It's used to store information about variables and functions.
+4. The type checker. This is used to ensure that the types of variables and functions are correct.
+5. The code generator. This takes the AST and generates an intermediate representation (IR).
+6. The optimizer. This takes the IR and optimizes it. This'll be broken up into a few stages.
+7. The lowerer. This takes the IR and lowers it to a simpler IR.
+8. The register allocator. This takes the IR, which has instructions in an infinite number of registers, and assigns them to a finite number of registers.
+9. The code emitter. This takes the IR and emits RISC-V assembly.
+
+As far as possible, I'd like to keep each of these stages separate. One benefit of this is that it simplifies memory management greatly. I plan to use an arena allocator for each stage, and by making sure the only thing on the actual heap is the output of the stage, and all temporary data is stored in the arena, I can free all the memory used by a stage by simply freeing the arena.
+
+## Some Rules
+
+Here are some rules (more like guidelines) that I plan to follow for this project; they're mostly just to keep things simple and consistent.
+
+#### 1\. PROGRAM LIKE IT'S 1999
+
+> 640 KB ought to be enough for anybody. - Bill Gates
+
+Maybe not that little, But I'm going to try to keep the project as simple as possible, 640 KB probably won't be enough, but I'll still aim for less than 10 MB of memory usage.
+
+This places a lot of constraints on the project, but I think it's a good exercise in minimalism.
+
+Some consequences of this are that I'll have to use memory-wise algorithms, be very careful about program structure, and avoid some of the bigger libraries (which will help with making this project self-hosting in the future).
+
+#### 2\. PROGRAM IN C++--
+
+I'm not a big fan of C++, but its class system helps prevent a lot of ugly bugs. To that end, I'm going to try and keep data structures out of header files, and only expose functions that operate on those data structures, to create a sort of approximation of a class. This has a few benefits:
+
+* Quicker compilation. A change to a data structure will only require one file to be recompiled, rather than every file that includes the header.
+* Less chance of bugs. If a function is the only way to interact with a data structure, then it's much harder to misuse that data structure.
+* Run time type checking. I can include some sort of tag in the first field of every data structure to ensure that the correct functions are being called.
+
+#### 3\. DON'T GET FANCY
+
+My goal here isn't to write the fastest interpreter in the world, or the most complete. I just want to make something that works and can be understood by someone else.
+
+That means I'm going to avoid a lot of the tricks that are used in production interpreters, and focus more on simplicity and readability.
+
+#### 4\. DESIGN FOR DEBUGGING
+
+This code is going to be peppered with asserts and contain mechanisms to print out the state of the program at any point.
+
+This might be painful, but it'll make debugging a lot simpler and let users look under the hood.
+
+#### 5\. SMART DATA, STUPID CODE
+
+A lot of times, the right data structure can replace 50-100 lines of procedural code. I'm going to try and design data structures which make the algorithms as simple as possible.
+
+For example, instead of writing 50-100 lines of code to hold every keyword in the language, I can just use a simple hash table.
+
+#### Misc
+THIS IS A LITERATE PROGRAM! Go to [this link](https://reagancfischer.dev/lexer.lit) to see the file that generated this HTML.
+
+## The Lexer
+
+A lexical analyzer reads source code and produces tokens, which are the smallest unit of meaning in a language. For example, in the C programming language, the tokens are things like keywords (if, else, while, etc.), identifiers (variable names), numbers, and punctuation (braces, semicolons, etc.).
+
+Given a string like `int main() { return 0; }`, the lexer would produce a series of tokens like `INT`, `IDENTIFIER(main)`, `LPAREN`, `RPAREN`, `LBRACE`, `RETURN`, `INTCONSTANT(0)`, `SEMICOLON`, `RBRACE`.
+
+### Design
+
+I'll break the lexer up into a couple of modules. `token.c` will contain the token data structure and functions to create and destroy tokens. `input.c` will contain the input data structure and functions to read from the input file. `tokenizer.c` will contain the main lexer logic.
+
+### Token
+Tokens are the smallest unit of meaning in a language. They're used by the parser to build an abstract syntax tree (AST). We'll need a couple of things to represent a token:
+* The type of token. This will be an enum, with values like `TOK_CTK_IF` or `TOK_CONST_INTEGER_U32`.
+* The value of the token. Some tokens, like keywords, don't have a value. Others, like identifiers or constants, do.
+* The line and column of the token. This is used for error messages.
+
+As I mentioned earlier, we're trying to implement a sort of class system in C. To that end, tokens will be opaque to all other modules. The only way to interact with them will be through functions in `token.c`.
+### Input
+
+#### Input Interface
diff --git a/projects/style.css b/projects/style.css
@@ -13,147 +13,150 @@ code,
font-family: 'Fira Mono', Menlo, Monaco, Consolas, Liberation Mono, Courier New, monospace;
}
-.block-header,
-.block-header+pre.prettyprint {
- background-color: #e0e0e0;
- border: 2px solid #fffdef;
- box-sizing: border-box;
- border-radius: 0.6rem;
+/* Quotes and Block Quotes */
+blockquote {
+ margin: 1.5em 10px;
+ padding: 0.5em 10px;
+ border-left: 5px solid #ccc;
+ color: #666;
+ background-color: #f9f9f9;
+ font-style: italic;
}
-.block-header {
- display: inline-block;
- position: relative;
- z-index: 6;
- border-top-left-radius: 0.6rem;
- border-top-right-radius: 0.6rem;
- border-bottom-width: 0;
- padding: 0.4rem 0.6rem;
- background-color: #333;
- color: #fff;
+blockquote p {
+ margin: 0;
+ font-size: 1.2em;
}
-.block-title {
- font-weight: normal;
+q {
+ quotes: "“" "”" "‘" "’";
+ font-style: italic;
}
-.block-title a {
- margin-top: -1rem;
- padding-top: 1rem;
+q::before {
+ content: open-quote;
}
-.block-title,
-.block-header a:link,
-.block-header a:visited {
- color: #fff;
+q::after {
+ content: close-quote;
}
-.block-header a:hover,
-.block-header a:active {
- color: #ffa500;
+/*! Color themes for Google Code Prettify | MIT License | github.com/jmblog/color-themes-for-google-code-prettify */
+.prettyprint {
+ background: #f5f7ff;
+ font-family: Menlo, "Bitstream Vera Sans Mono", "DejaVu Sans Mono", Monaco, Consolas, monospace;
+ border: 0 !important;
}
-.code-block pre.prettyprint {
- padding: 0.6rem;
- white-space: pre-wrap;
- border-radius: 0.6rem;
+.pln {
+ color: #202746;
}
-.code-block .block-header+pre.prettyprint {
- position: relative;
- z-index: 5;
- margin-top: -1px;
- border-top-left-radius: 0;
+/* Specify class=linenums on a pre to get line numbering */
+ol.linenums {
+ margin-top: 0;
+ margin-bottom: 0;
+ color: #202746;
}
-/* Usages Block */
-.block-usages {
- margin-top: -1rem;
+li.L0,
+li.L1,
+li.L2,
+li.L3,
+li.L4,
+li.L5,
+li.L6,
+li.L7,
+li.L8,
+li.L9 {
+ padding-left: 1em;
+ background-color: #f5f7ff;
+ list-style-type: decimal;
}
-.block-usages small {
- display: inline-block;
- margin: 0.4rem 0.6rem;
- font-size: 11pt;
- color: #363535;
-}
+@media screen {
-.block-usages a,
-.block-usages span {
- padding: 0 0.5rem;
- margin-left: 0.1rem;
-}
+ /* string content */
-.block-usages a {
- background-color: #f9f8f4;
- border: 1px solid #c7c6bf;
- box-sizing: border-box;
- color: #57554a;
- border-radius: 0.3rem;
-}
+ .str {
+ color: #ac9739;
+ }
-.block-usages a+*,
-.block-usages span+* {
- margin-left: 0.2rem;
-}
+ /* keyword */
-.block-usages a:hover,
-.block-usages a:active {
- text-decoration: none;
- background-color: #f9f9f7;
- color: #a6a28d;
-}
+ .kwd {
+ color: #6679cc;
+ }
-/* File Name */
-.file-name {
- display: block;
- position: relative;
- /* Right align */
- float: right;
- z-index: 5;
- color: #00ff22;
- text-align: right;
- margin: 0.2rem 0.6rem;
- padding: 0.2rem 0.6rem;
- background-color: #333;
- border-radius: 0.3rem;
- border-bottom-width: 0;
- top: 0.2rem;
-}
+ /* comment */
-/* File name in a header. Keep the monospace font and color, ditch the background and border */
-.file-name-hr {
- display: inline;
- text-align: right;
- border-radius: 0.3rem;
- border-bottom-width: 0;
- top: 0.2rem;
-}
+ .com {
+ color: #202746;
+ }
-/* Quotes and Block Quotes */
-blockquote {
- margin: 1.5em 10px;
- padding: 0.5em 10px;
- border-left: 5px solid #ccc;
- color: #666;
- background-color: #f9f9f9;
- font-style: italic;
-}
+ /* type name */
-blockquote p {
- margin: 0;
- font-size: 1.2em;
-}
+ .typ {
+ color: #3d8fd1;
+ }
-q {
- quotes: "“" "”" "‘" "’";
- font-style: italic;
-}
+ /* literal value */
-q::before {
- content: open-quote;
-}
+ .lit {
+ color: #c76b29;
+ }
-q::after {
- content: close-quote;
+ /* punctuation */
+
+ .pun {
+ color: #202746;
+ }
+
+ /* lisp open bracket */
+
+ .opn {
+ color: #202746;
+ }
+
+ /* lisp close bracket */
+
+ .clo {
+ color: #202746;
+ }
+
+ /* markup tag name */
+
+ .tag {
+ color: #c94922;
+ }
+
+ /* markup attribute name */
+
+ .atn {
+ color: #c76b29;
+ }
+
+ /* markup attribute value */
+
+ .atv {
+ color: #22a2c9;
+ }
+
+ /* declaration */
+
+ .dec {
+ color: #c76b29;
+ }
+
+ /* variable name */
+
+ .var {
+ color: #c94922;
+ }
+
+ /* function name */
+
+ .fun {
+ color: #3d8fd1;
+ }
}
\ No newline at end of file