util_close_html_tags

one of the documented procedures in this installation of the ACS
Usage:
util_close_html_tags   html_fragment   { break_soft "0" }   { break_hard "0" }
What it does:
Given an HTML fragment, this procedure will close any tags that have been left open. The optional arguments let you specify that the fragment is to be truncated to a certain number of displayable characters. After break_soft, it truncates and closes open tags unless you're within non-breaking tags (e.g., Af). After break_hard displayable characters, the procedure simply truncates and closes any open HTML tags that might have resulted from the truncation.

Note that the internal syntax table dictates which tags are non-breaking. The syntax table has codes:

Defined in: /web/philip/packages/acs-core/utilities-procs.tcl

Source code:


    set frag $html_fragment 

    set syn(A) nobr
    set syn(ADDRESS) nobr
    set syn(NOBR) nobr
    #
    set syn(FORM) discard
    set syn(TABLE) discard
    #
    set syn(BLINK) remove 
    #
    set syn(FONT) close
    set syn(B) close
    set syn(BIG) close
    set syn(I) close
    set syn(S) close
    set syn(SMALL) close
    set syn(STRIKE) close
    set syn(SUB) close
    set syn(SUP) close
    set syn(TT) close
    set syn(U) close
    set syn(ABBR) close
    set syn(ACRONYM) close
    set syn(CITE) close
    set syn(CODE) close
    set syn(DEL) close
    set syn(DFN) close
    set syn(EM) close
    set syn(INS) close
    set syn(KBD) close
    set syn(SAMP) close
    set syn(STRONG) close
    set syn(VAR) close
    set syn(DIR) close
    set syn(DL) close
    set syn(MENU) close
    set syn(OL) close
    set syn(UL) close
    set syn(H1) close
    set syn(H2) close
    set syn(H3) close
    set syn(H4) close
    set syn(H5) close
    set syn(H6) close
    set syn(BDO) close
    set syn(BLOCKQUOTE) close
    set syn(CENTER) close
    set syn(DIV) close
    set syn(PRE) close
    set syn(Q) close
    set syn(SPAN) close

    set out {} 
    set out_len 0

    # counts how deep we are nested in nonbreaking tags, tracks the nobr point
    # and what the nobr string length would be
    set nobr 0
    set nobr_out_point 0
    set nobr_tagptr 0
    set nobr_len 0

    set discard 0

    set tagptr -1

    # first thing we do is chop off any trailing unclosed tag 
    # since when we substr blobs this sometimes happens
    
    # this should in theory cut any tags which have been cut open.
    while {[regexp {(.*)<[^>]*$} $frag match frag]} {}

    while { "$frag" != "" } {
        # here we attempt to cut the string into "pretag<TAG TAGBODY>posttag"
        # and build the output list.

        if {![regexp "(\[^<]*)(<\[ \t]*(/?)(\[^ \t>]+)(\[^>]*)>)?(.*)" $frag match pretag fulltag close tag tagbody frag]} {
            # should never get here since above will match anything.
            # puts "NO MATCH: should never happen! frag=$frag"
            append out $frag 
            set frag {}
        } else {
            # puts "\n\nmatch=$match\n pretag=$pretag\n fulltag=$fulltag\n close=$close\n tag=$tag\n tagbody=$tagbody\nfrag=$frag\n\n"
            if { ! $discard } {
                # figure out if we can break with the pretag chunk 
                if { $break_soft } {
                    if {! $nobr && [expr [string length $pretag] + $out_len] > $break_soft } {
                        # first chop pretag to the right length
                        set pretag [string range $pretag 0 [expr $break_soft - $out_len]]
                        # clip the last word
                        regsub "\[^ \t\n\r]*$" $pretag {} pretag
                        append out [string range $pretag 0 $break_soft]
                        break
                    } elseif { $nobr &&  [expr [string length $pretag] + $out_len] > $break_hard } {
                        # we are in a nonbreaking tag and are past the hard break
                        # so chop back to the point we got the nobr tag...
                        set tagptr $nobr_tagptr 
                        if { $nobr_out_point > 0 } { 
                            set out [string range $out 0 [expr $nobr_out_point - 1]]
                        } else { 
                            # here maybe we should decide if we should keep the tag anyway 
                            # if zero length result would be the result...
                            set out {}
                        }
                        break
                    } 
                }
                
                # tack on pretag
                append out $pretag
                incr out_len [string length $pretag]
            }
            
            # now deal with the tag if we got one...
            if  { $tag == "" } { 
                # if the tag is empty we might have one of the bad matched that are not eating 
                # any of the string so check for them 
                if {[string length $match] == [string length $frag]} { 
                    append out $frag
                    set frag {}
                }
            } else {
                set tag [string toupper $tag]            
                if { ![info exists syn($tag)]} {
                    # if we don't have an entry in our syntax table just tack it on 
                    # and hope for the best.
                    if { ! $discard } {
                        append  out $fulltag
                    }
                } else {
                    if { $close != "/" } {
                        # new tag 
                        # "remove" tags are just ignored here
                        # discard tags 
                        if { $discard } { 
                            if { $syn($tag) == "discard" } {
                                incr discard 
                                incr tagptr 
                                set tagstack($tagptr) $tag
                            }
                        } else {
                            switch $syn($tag) {
                                nobr { 
                                    if { ! $nobr } {
                                        set nobr_out_point [string length $out]
                                        set nobr_tagptr $tagptr
                                        set nobr_len $out_len
                                    }
                                    incr nobr
                                    incr tagptr 
                                    set tagstack($tagptr) $tag
                                    append out $fulltag
                                }
                                discard { 
                                    incr discard 
                                    incr tagptr 
                                    set tagstack($tagptr) $tag
                                }
                                close {                                 
                                    incr tagptr 
                                    set tagstack($tagptr) $tag
                                    append out $fulltag
                                }
                            }
                        }
                    } else { 
                        # we got a close tag
                        if { $discard } { 
                            # if we are in discard mode only watch for 
                            # closes to discarded tags
                            if { $syn($tag) == "discard"} {
                                if {$tagptr > -1} {
                                    if { $tag != $tagstack($tagptr) } {
                                        #puts "/$tag without $tag"
                                    } else {
                                        incr tagptr -1
                                        incr discard -1
                                    }
                                }
                            }
                        } else {
                            if { $syn($tag) != "remove"} {
                                # if tag is a remove tag we just ignore it...
                                if {$tagptr > -1} {
                                    if {$tag != $tagstack($tagptr) } {
                                        # puts "/$tag without $tag"
                                    } else {
                                        incr tagptr -1
                                        if { $syn($tag) == "nobr"} {
                                            incr nobr -1
                                        } 
                                        append out $fulltag
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    
    # on exit of the look either we parsed it all or we truncated. 
    # we should now walk the stack and close any open tags.

    for {set i $tagptr} { $i > -1 } {incr i -1} { 
        # append out "<!-- autoclose --> </$tagstack($i)>"
        append out "</$tagstack($i)>"
    }
    
    return $out


philg@mit.edu