#
# Bounded stack language for HTML
#
BSL type recursive trans-line
typefaces
r---- HTML-Text-Roman
i---- HTML-Text-Italic
rb--- HTML-Text-Bold
lb--- HTML-Text-Heading
t---- HTML-Text-Typewriter
r-uB- HTML-Text-HREF
#
t--B- HTML-Comment
t--R- HTML-Element
t---- HTML-Element-Attr
t--B- HTML-Entity
#
t--G- HTML-String
t--R- HTML-String-Escape
#
t---- HTML-Script-Plain
tb--- HTML-Script-Keyword
tb-l- HTML-Script-Variable
t--G- HTML-Script-Number
state set
R HTML-Text-Roman
I HTML-Text-Italic
B HTML-Text-Bold
H HTML-Text-Heading
T HTML-Text-Typewriter
A HTML-Text-HREF
#
D HTML-Element-Attr
C HTML-Comment
S HTML-String
#
j HTML-Script-Plain
s HTML-String
h HTML-String
c HTML-Comment
d HTML-Comment
demo string Click here
word range a-zA-Z0-9_
rules
#
########################################################################
#
# PART 1 - Regular HTML
#
# Scan sequences of non-special characters first (for speed).
#
RIBHTA [^<>"&]+ nop
RIBHTA \{HTML-Entity}\&[#0-9a-zA-Z]+; nop
#
# Commands
#
RIBHTA \{HTML-Element}::::: push I
RIBHTA \{HTML-Element}: push B
RIBHTA \{HTML-Element}<[hH][0-9]>:: push H
RIBHTA \{HTML-Element}::
:
:: push T
RIBHTA \{HTML-Element}<[aA]\{HTML-Element-Attr}[ ]*[hH][rR][eE][fF]\> push A, push D
RIBHTA \{HTML-Element}
::::::::: pop
RIBHTA \{HTML-Element}::::::::: pop
RIBHTAC \{HTML-Element}::: pop
#
# Standard C control keywords
#
j \{HTML-Script-Keyword}\:\:\:\:\:\:\:\:\:\:\:\ nop
#
# Object oriented and ecma/php specific keywords
#
j \{HTML-Script-Keyword}\:\:\:\:\:\ nop
j \{HTML-Script-Keyword}\:\:\ nop
j \{HTML-Script-Keyword}\:\:\ nop
#
# Strings, numbers and comments
#
j " push s
j ' push h
j \{HTML-Script-Number}0[xX][0-9a-fA-F]+:-0[xX][0-9a-fA-F]+:[\-0-9]*.[0-9]+[eE][0-9]+:[\-0-9]+.[0-9]*[eE][0-9]+:[\-0-9]+.[0-9]*:[\-0-9]*.[0-9]+:[0-9]+:-[0-9]+ nop
js \{HTML-Script-Plain}\$\{HTML-Script-Variable}[a-zA-Z0-9_]+ nop
js \{HTML-Script-Plain}\${\{HTML-Script-Variable}[a-zA-Z0-9_]+\{HTML-Script-Plain}} nop
js \{HTML-Script-Plain}{\$\{HTML-Script-Variable}[\-a-zA-Z0-9_$>'\[\]]+\{HTML-Script-Plain}} nop
j \{HTML-Comment}/\* push c
j \{HTML-Comment}// push d
#
# Inside comments
#
cd [^/*?]+ nop
c \*/ pop
d \?> pop
d $ pop
#
# Inside strings
#
sh [^"'\\${]+ nop
s \{HTML-Script-Plain}" pop
h \{HTML-Script-Plain}' pop
sh \{HTML-String-Escape}\\[0-9][0-9][0-9]:\\x[0-9A-Fa-f][0-9A-Fa-f]:\\a:\\b:\\e:\\f:\\n:\\r:\\t:\\v:\\0 nop
sh \{HTML-String-Escape}\\\{HTML-String}? nop
#sh $ pop