# # Bounded stack language for HTML # BSL type recursive trans-line typefaces r---- HTML-Text-Roman i---- HTML-Text-Italic rb--- HTML-Text-Bold lb--- HTML-Text-Heading t---- HTML-Text-Typewriter r-uB- HTML-Text-HREF # t--B- HTML-Comment t--R- HTML-Element t---- HTML-Element-Attr t--B- HTML-Entity # t--G- HTML-String t--R- HTML-String-Escape # t---- HTML-Script-Plain tb--- HTML-Script-Keyword tb-l- HTML-Script-Variable t--G- HTML-Script-Number state set R HTML-Text-Roman I HTML-Text-Italic B HTML-Text-Bold H HTML-Text-Heading T HTML-Text-Typewriter A HTML-Text-HREF # D HTML-Element-Attr C HTML-Comment S HTML-String # j HTML-Script-Plain s HTML-String h HTML-String c HTML-Comment d HTML-Comment demo string Click here word range a-zA-Z0-9_ rules # ######################################################################## # # PART 1 - Regular HTML # # Scan sequences of non-special characters first (for speed). # RIBHTA [^<>"&]+ nop RIBHTA \{HTML-Entity}\&[#0-9a-zA-Z]+; nop # # Commands # RIBHTA \{HTML-Element}::::: push I RIBHTA \{HTML-Element}: push B RIBHTA \{HTML-Element}<[hH][0-9]>::<title> push H RIBHTA \{HTML-Element}<TT>:<tt>:<PRE>:<pre>:<CODE>:<code> push T RIBHTA \{HTML-Element}<[aA]\{HTML-Element-Attr}[ ]*[hH][rR][eE][fF]\> push A, push D RIBHTA \{HTML-Element}</i>:</it>:</em>:</b>:</h[0-9]>::::: pop RIBHTA \{HTML-Element}::::::::: pop RIBHTAC \{HTML-Element}::: push j, push D RIBHTACDjsh \{HTML-Element}<\? push j RIBHTA \{HTML-Element}<[ \t]*[/!?]?[a-zA-Z0-9]* push D # # Command trailers # D \{HTML-Element}-- goto C D \{HTML-Element}> pop D [a-zA-Z]+ nop D "\{HTML-String} push S # # Strings inside elements # S [^"<]+ nop S \{HTML-Element}<\? push j S \{HTML-Element-Attr}" pop # # Comments # C \{HTML-Element}-- goto D # ######################################################################## # # PART 2 - Scripts and PHP (try to cover this in one system) # j \{HTML-Element}\?>:::: pop # # Standard C control keywords # j \{HTML-Script-Keyword}\:\:\:\:\:\:\:\:\:\:\:\ nop # # Object oriented and ecma/php specific keywords # j \{HTML-Script-Keyword}\:\:\:\:\:\ nop j \{HTML-Script-Keyword}\:\:\ nop j \{HTML-Script-Keyword}\:\:\ nop # # Strings, numbers and comments # j " push s j ' push h j \{HTML-Script-Number}0[xX][0-9a-fA-F]+:-0[xX][0-9a-fA-F]+:[\-0-9]*.[0-9]+[eE][0-9]+:[\-0-9]+.[0-9]*[eE][0-9]+:[\-0-9]+.[0-9]*:[\-0-9]*.[0-9]+:[0-9]+:-[0-9]+ nop js \{HTML-Script-Plain}\$\{HTML-Script-Variable}[a-zA-Z0-9_]+ nop js \{HTML-Script-Plain}\${\{HTML-Script-Variable}[a-zA-Z0-9_]+\{HTML-Script-Plain}} nop js \{HTML-Script-Plain}{\$\{HTML-Script-Variable}[\-a-zA-Z0-9_$>'\[\]]+\{HTML-Script-Plain}} nop j \{HTML-Comment}/\* push c j \{HTML-Comment}// push d # # Inside comments # cd [^/*?]+ nop c \*/ pop d \?> pop d $ pop # # Inside strings # sh [^"'\\${]+ nop s \{HTML-Script-Plain}" pop h \{HTML-Script-Plain}' pop sh \{HTML-String-Escape}\\[0-9][0-9][0-9]:\\x[0-9A-Fa-f][0-9A-Fa-f]:\\a:\\b:\\e:\\f:\\n:\\r:\\t:\\v:\\0 nop sh \{HTML-String-Escape}\\\{HTML-String}? nop #sh $ pop