001package Torello.HTML;
002
003import java.util.*;
004import java.util.regex.*;
005import java.util.stream.*;
006
007import java.util.function.Predicate;
008
009import Torello.HTML.NodeSearch.*;
010import Torello.Java.*;
011
012import Torello.Java.Shell.C;
013
014/**
015 * <CODE>Util - Documentation.</CODE><BR /><BR />
016 * <EMBED CLASS="external-html" DATA-FILE-ID="UTIL">
017 */
018@Torello.HTML.Tools.JavaDoc.StaticFunctional
019public class Util
020{
021    private Util() { }
022
023
024    // ***************************************************************************************
025    // ***************************************************************************************
026    // Trim TextNode Strings
027    // ***************************************************************************************
028    // ***************************************************************************************
029
030
031    /** Convenience Method.  Invokes {@link #trimTextNodes(Vector, int, int, boolean)} */
032    public static int trimTextNodes(Vector<HTMLNode> page, boolean deleteZeroLengthStrings)
033    { return trimTextNodes(page, 0, -1, deleteZeroLengthStrings); }
034
035    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link #trimTextNodes(Vector, int, int, boolean)} */
036    public static int trimTextNodes(Vector<HTMLNode> page, DotPair dp, boolean deleteZeroLengthStrings)
037    { return trimTextNodes(page, dp.start, dp.end + 1, deleteZeroLengthStrings); }
038
039    /**
040     * This will iterate through the entire {@code Vector<HTMLNode>}, and invoke
041     * {@code java.lang.String.trim()} on each {@code TextNode} on the page.  If this invocation
042     * results in a reduction of {@code String.length()}, then a new {@code TextNode} will be
043     * instantiated whose {@code TextNode.str} field is set to the result of the
044     * {@code String.trim(old_node.str)} operation.
045     * 
046     * @param deleteZeroLengthStrings If a {@code TextNode's} length is zero (before or after
047     * {@code trim()} is called) and when this parameter is <B>TRUE</B>, that {@code TextNode} must
048     * be removed from the {@code Vector}.
049     * 
050     * @return Any node that is trimmed or deleted will increment the counter.  This counter
051     * final-value is returned
052     */
053    public static int trimTextNodes
054        (Vector<HTMLNode> page, int sPos, int ePos, boolean deleteZeroLengthStrings)
055    {
056        int                 counter = 0;
057        IntStream.Builder   b       = deleteZeroLengthStrings ? IntStream.builder() : null;
058        HTMLNode            n       = null;
059        LV                  l       = new LV(page, sPos, ePos);
060
061        for (int i=l.start; i < l.end; i++)
062            if ((n = page.elementAt(i)).isTextNode())
063            {
064                String  trimmed         = n.str.trim();
065                int     trimmedLength   = trimmed.length();
066
067                if ((trimmedLength == 0) && deleteZeroLengthStrings)
068                    { b.add(i); counter++; }
069                else if (trimmedLength < n.str.length())
070                    { page.setElementAt(new TextNode(trimmed), i); counter++; }
071            }
072
073        if (deleteZeroLengthStrings) removeNodesOPT(page, b.build().toArray());
074
075        return counter;
076    }
077
078
079    // ***************************************************************************************
080    // ***************************************************************************************
081    // Inclusive-Empty Removal Operations
082    // ***************************************************************************************
083    // ***************************************************************************************
084
085
086    /** Convenience Method.  Invokes {@link #removeInclusiveEmpty(Vector, int, int, String[])} */
087    public static int removeInclusiveEmpty(Vector<HTMLNode> page, String... htmlTags)
088    { return removeInclusiveEmpty(page, 0, -1, htmlTags); }
089
090    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link #removeInclusiveEmpty(Vector, int, int, String[])} */
091    public static int removeInclusiveEmpty(Vector<HTMLNode> page, DotPair dp, String... htmlTags)
092    { return removeInclusiveEmpty(page, dp.start, dp.end + 1, htmlTags); }
093
094    /**
095     * This will do an "Inclusive Search" using the standard {@code class TagNodeInclusiveIterator}
096     * in the {@code package NodeSearch}.  Then it will inspect the contents of the subsections.
097     * Any subsections that do not contain any instances of {@code HTMLNode} in between them, or
098     * any subsections that only contain "blank-text" (white-space) between them shall be removed.
099     * 
100     * <BR /><BR /><B><SPAN STYLE="color: red;">IMPORTANT:</B></SPAN> The search logic shall
101     * perform multiple <I><B>recursive iterations</B></I> of itself, such that if, for instance,
102     * the user requested that all empty HTML divider ({@code <DIV>}) elements be removed, if after
103     * removing a set a dividers resulted in more empty ones (nested {@code <DIV>} elements), then
104     * an additional removal shall be called.  <I>This recursion shall continue until there are no
105     * empty HTML elements of the types listed by</I> {@code 'htmlTags'}
106     *
107     * @param page Any vectorized-html page or sub-page.
108     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
109     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
110     * 
111     * @param htmlTags The list of <I>inclusive</I> (non-singleton) html elements to search for
112     * possibly being empty container tags.
113     * 
114     * @return The number of {@code HTMLNode's} that were removed.
115     * 
116     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
117     */
118    public static int removeInclusiveEmpty
119        (Vector<HTMLNode> page, int sPos, int ePos, String... htmlTags)
120    {
121        DotPair         subList;
122        int             removed = 0;
123        HNLIInclusive   iter    = TagNodeInclusiveIterator.iter(page, htmlTags);
124        LV              l       = new LV(page, sPos, ePos);
125
126        iter.restrictCursor(l);
127
128        TOP:
129        while (iter.hasNext())
130
131            // If there is only the opening & closing pair, with nothing in between,
132            // then the pair must be removed because it is "Empty" (Inclusive Empty)
133            if ((subList = iter.nextDotPair()).size() == 2)
134            { iter.remove();    ePos -= subList.size();     removed += subList.size(); }
135
136            else
137            {
138                // If there is any TagNode in between the start-end pair, then this is NOT EMPTY
139                // In this case, skip to the next start-end opening-closing pair.
140                for (int i=(subList.start + 1); i < subList.end; i++)
141                    if (! page.elementAt(i).isTextNode())
142                        continue TOP;
143
144                // If there were only TextNode's between an opening-closing TagNode Pair....
145                // **AND** those TextNode's are only white-space, then this also considered
146                // Inclusively Empty.  (Get all TextNode's, and if .trim() reduces the length()
147                // to zero, then it was only white-space.
148                if (Util.textNodesString(page, subList).trim().length() == 0)
149                { iter.remove();    ePos -= subList.size();     removed += subList.size(); }
150            }
151
152        // This process must be continued recursively, because if any inner, for instance,
153        // <DIV> ... </DIV> was removed, then the outer list must be re-checked...
154        if (removed > 0)
155            return removed + removeInclusiveEmpty(page, sPos, ePos, htmlTags);
156        else
157            return 0;
158    }
159
160
161    // ***************************************************************************************
162    // ***************************************************************************************
163    // Vectorized-HTML To-String Methods
164    // ***************************************************************************************
165    // ***************************************************************************************
166
167
168    /** Convenience Method.  Invokes {@link rangeToString(Vector, int, int)}. */
169    public static String pageToString(Vector<? extends HTMLNode> html)
170    { return rangeToString(html, 0, -1); }
171
172    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link rangeToString(Vector, int, int)}. */
173    public static String rangeToString(Vector<? extends HTMLNode> html, DotPair dp)
174    { return rangeToString(html, dp.start, dp.end + 1); }
175
176    /**
177     * The purpose of this method/function is to convert a portion of the contents of an HTML-Page,
178     * currently being represented as a {@code Vector} of {@code HTMLNode's} into a {@code String.}
179     * Two {@code 'int'} parameters are provided in this method's signature to define a sub-list
180     * of a page to be converted to a {@code java.lang.String}
181     * 
182     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
183     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
184     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
185     * 
186     * @return The {@code Vector} converted into a {@code String}.
187     * 
188     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
189     * 
190     * @see #pageToString(Vector)
191     * @see #rangeToString(Vector, DotPair)
192     */
193    public static String rangeToString(Vector<? extends HTMLNode> html, int sPos, int ePos)
194    {
195        StringBuilder   ret = new StringBuilder();
196        LV              l   = new LV(html, sPos, ePos);
197
198        for (int i=l.start; i < l.end; i++) ret.append(html.elementAt(i).str);
199
200        return ret.toString();
201    }
202
203
204    // ***************************************************************************************
205    // ***************************************************************************************
206    // Vectorized-HTML TextNode To-String Methods
207    // ***************************************************************************************
208    // ***************************************************************************************
209
210
211    /** Convenience Method.  Invokes {@link #textNodesString(Vector, int, int)} */
212    public static String textNodesString(Vector<? extends HTMLNode> html)
213    { return textNodesString(html, 0, -1); }
214
215    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link #textNodesString(Vector, int, int)} */
216    public static String textNodesString(Vector<? extends HTMLNode> html, DotPair dp)
217    { return textNodesString(html, dp.start, dp.end + 1); }
218
219    /**
220     * This will return a {@code String} that is comprised of ONLY the {@code TextNode's} contained
221     * within the input {@code Vector} - <I>and furthermore, only nodes that are situated between
222     * index {@code int 'sPos'} and index {@code int 'ePos'} in that {@code Vector.}</I>
223     * 
224     * <BR /><BR />The {@code for-loop} that iterates the input-{@Vector} parameter will
225     * simply skip an instance of {@code 'TagNode'} and {@code 'CommentNode'} when building the
226     * output return {@code String.}.
227     * 
228     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
229     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
230     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
231     * 
232     * @return This will return a {@code String} that is comprised of the text-only elements in the
233     * web-page or sub-page.  Only text between the requested {@code Vector}-indices is included.
234     * 
235     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
236     * 
237     * @see #textNodesString(Vector, DotPair)
238     * @see #textNodesString(Vector)
239     */
240    public static String textNodesString(Vector<? extends HTMLNode> html, int sPos, int ePos)
241    {
242        StringBuilder   sb  = new StringBuilder();
243        LV              l   = new LV(html, sPos, ePos);
244        HTMLNode        n;
245
246        for (int i=l.start; i < l.end; i++)
247            if ((n = html.elementAt(i)).isTextNode())
248                sb.append(n.str);
249
250        return sb.toString();
251    }
252
253
254    // ***************************************************************************************
255    // ***************************************************************************************
256    // TextNode Removal Operations
257    // ***************************************************************************************
258    // ***************************************************************************************
259
260
261    /** Convenience Method.  Invokes {@link #removeAllTextNodes(Vector, int, int)} */
262    public static int removeAllTextNodes(Vector<HTMLNode> page)
263    { return removeAllTextNodes(page, 0, -1); }
264
265    /** Convenience Method.  Receives {@code DotPair} and invokes {@link #removeAllTextNodes(Vector, int, int)} */
266    public static int removeAllTextNodes(Vector<HTMLNode> page, DotPair dp)
267    { return removeAllTextNodes(page, dp.start, dp.end + 1); }
268
269    /**
270     * Takes a sub-section of an HTML {@code Vector} and removes all {@code TextNode} present
271     * 
272     * @param page Any HTML page
273     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
274     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
275     * 
276     * @return The number of HTML {@code TextNode's} that were removed
277     * 
278     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
279     * 
280     * @see TextNode
281     * @see #removeNodesOPT(Vector, int[])
282     */
283    public static int removeAllTextNodes(Vector<HTMLNode> page, int sPos, int ePos)
284    {
285        IntStream.Builder   b       = IntStream.builder();
286        LV                  l       = new LV(page, sPos, ePos);
287
288        // Use Java-Streams to build the list of nodes that are valid text-nodes.
289        for (int i=l.start; i < l.end; i++) if (page.elementAt(i).isTextNode()) b.add(i);
290
291        // Build the stream and convert it to an int[] (integer-array)
292        int[]               posArr  = b.build().toArray();
293
294        // The integer array is guaranteed to be sorted, and contain valid vector-indices.
295        removeNodesOPT(page, posArr);
296
297        return posArr.length;
298    }
299
300
301    // ***************************************************************************************
302    // ***************************************************************************************
303    // TagNode Removal Operations
304    // ***************************************************************************************
305    // ***************************************************************************************
306
307
308    /** Convenience Method.  Invokes {@link #removeAllTagNodes(Vector, int, int)} */
309    public static int removeAllTagNodes(Vector<HTMLNode> page) 
310    { return removeAllTagNodes(page, 0, -1); }
311
312    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link #removeAllTagNodes(Vector, int, int)} */
313    public static int removeAllTagNodes(Vector<HTMLNode> page, DotPair dp)
314    { return removeAllTagNodes(page, dp.start, dp.end + 1); }
315
316    /**
317     * Takes a sub-section of an HTML {@code Vector} and removes all {@code TagNode} present
318     * 
319     * @param page Any HTML page
320     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
321     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
322     * 
323     * @return The number of HTML {@code TagNode's} that were removed
324     * 
325     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
326     * 
327     * @see TagNode
328     * @see #removeNodesOPT(Vector, int[])
329     */
330    public static int removeAllTagNodes(Vector<HTMLNode> page, int sPos, int ePos)
331    {
332        IntStream.Builder   b       = IntStream.builder();
333        LV                  l       = new LV(page, sPos, ePos);
334
335        // Use Java-Streams to build the list of nodes that are valid tag-nodes.
336        for (int i=l.start; i < l.end; i++) if (page.elementAt(i).isTagNode()) b.add(i);
337
338        // Build the stream and convert it to an int[] (integer-array)
339        int[]               posArr  = b.build().toArray();
340
341        // The integer array is guaranteed to be sorted, and contain valid vector-indices.
342        removeNodesOPT(page, posArr);
343
344        return posArr.length;
345    }
346
347
348    // ***************************************************************************************
349    // ***************************************************************************************
350    // CommentNode Removal Operations
351    // ***************************************************************************************
352    // ***************************************************************************************
353
354
355    /** Convenience Method.  Invokes {@link #removeAllCommentNodes(Vector, int, int)} */
356    public static int removeAllCommentNodes(Vector<HTMLNode> page)
357    { return removeAllCommentNodes(page, 0, -1); }
358
359    /** Convenience Method. Receives {@code DotPair} and Invokes {@link #removeAllCommentNodes(Vector, int, int)} */
360    public static int removeAllCommentNodes(Vector<HTMLNode> page, DotPair dp)
361    { return removeAllCommentNodes(page, dp.start, dp.end + 1); }
362
363    /**
364     * Takes a sub-section of an HTML {@code Vector} and removes all {@code CommentNode} present
365     * 
366     * @param page Any HTML page
367     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
368     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
369     * 
370     * @return The number of HTML {@code CommentNode's} that were removed
371     * 
372     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
373     * 
374     * @see CommentNode
375     * @see #removeNodesOPT(Vector, int[])
376     */
377    public static int removeAllCommentNodes(Vector<HTMLNode> page, int sPos, int ePos)
378    {
379        IntStream.Builder   b       = IntStream.builder();
380        LV                  l       = new LV(page, sPos, ePos);
381
382        // Use Java-Streams to build the list of nodes that are valid comment-nodes.
383        for (int i=l.start; i < l.end; i++)
384            if (page.elementAt(i).isCommentNode())
385                b.add(i);
386
387        // Build the stream and convert it to an int[] (integer-array)
388        int[]               posArr  = b.build().toArray();
389
390        // The integer array is guaranteed to be sorted, and contain valid vector-indices.
391        removeNodesOPT(page, posArr);
392
393        return posArr.length; 
394    }
395
396
397    // ***************************************************************************************
398    // ***************************************************************************************
399    // TextNode Modification Operations - "Escape Text Nodes"
400    // ***************************************************************************************
401    // ***************************************************************************************
402
403
404    /** Convenience Method.  Invokes {@link escapeTextNodes(Vector, int, int)}. */
405    public static int escapeTextNodes(Vector<HTMLNode> html)
406    { return escapeTextNodes(html, 0, -1); }
407
408    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link escapeTextNodes(Vector, int, int)} */
409    public static int escapeTextNodes(Vector<HTMLNode> html, DotPair dp)
410    { return escapeTextNodes(html, dp.start, dp.end + 1); }
411
412    /**
413     * Will call {@code HTML.Escape.replaceAll} on each {@code TextNode} in the range of
414     * {@code sPos ... ePos}
415     * 
416     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
417     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
418     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
419     * 
420     * @return The number of {@code TextNode's} that changed as a result of the
421     * {@code Escape.replaceAll(n.str)} loop.
422     * 
423     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
424     * 
425     * @see Escape#replaceAll(String)
426     */
427    public static int escapeTextNodes(Vector<HTMLNode> html, int sPos, int ePos)
428    {
429        LV          l       = new LV(html, sPos, ePos);
430        HTMLNode    n       = null;
431        String      s       = null;
432        int         counter = 0;
433
434        for (int i=l.start; i < l.end; i++)
435            if ((n = html.elementAt(i)).isTextNode())
436                if (! (s = Escape.replace(n.str)).equals(n.str))
437                {
438                    html.setElementAt(new TextNode(s), i);
439                    counter++;
440                }
441        return counter;
442    }
443
444
445    // ***************************************************************************************
446    // ***************************************************************************************
447    // Clone HTML Vectors
448    // ***************************************************************************************
449    // ***************************************************************************************
450
451
452    /** Convenience Method.  Invokes {@link cloneRange(Vector, int, int)} */
453    public static Vector<HTMLNode> clone(Vector<? extends HTMLNode> html)
454    { return cloneRange(html, 0, -1); }
455
456    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link cloneRange(Vector, int, int)} */
457    public static Vector<HTMLNode> cloneRange(Vector<? extends HTMLNode> html, DotPair dp)
458    { return cloneRange(html, dp.start, dp.end + 1); }
459
460    /**
461     * Copies (clones!) a sub-range of the HTML page, stores the results in a {@code Vector}, and
462     * returns it.
463     * 
464     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
465     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
466     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
467     * 
468     * @return The "cloned" (copied) sub-range specified by {@code 'sPos'} and {@code 'ePos'.}
469     * 
470     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
471     * 
472     * @see #cloneRange(Vector, DotPair)
473     */
474    public static Vector<HTMLNode> cloneRange(Vector<? extends HTMLNode> html, int sPos, int ePos)
475    {
476        LV                  l   = new LV(html, sPos, ePos);
477        Vector<HTMLNode>    ret = new Vector<>(l.end - l.start);
478
479        // Copy the range specified into the return vector
480        for (int i = l.start; i < l.end; i++) ret.addElement(html.elementAt(i));
481
482        return ret;
483    }
484
485
486    // ***************************************************************************************
487    // ***************************************************************************************
488    // Remove All Inner Tags
489    // ***************************************************************************************
490    // ***************************************************************************************
491
492
493    /** Convenience Method.  Invokes {@link #removeAllInnerTags(Vector, int, int)} */
494    public static int removeAllInnerTags(Vector<HTMLNode> html)
495    { return removeAllInnerTags(html, 0, -1); }
496
497    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link #removeAllInnerTags(Vector, int, int)} */
498    public static int removeAllInnerTags(Vector<? super TagNode> html, DotPair dp)
499    { return removeAllInnerTags(html, dp.start, dp.end + 1); }
500
501    /**
502     * This method removes all inner-tags (all attributes) from every {@code TagNode} inside of an
503     * HTML page.  It does this by replacing every {@code TagNode} in the {@code Vector} with the
504     * pre-instantiated, publicly-available {@code TagNode} which can be obtained by a call to the
505     * class {@code HTMLTags.hasTag(token, TC)}.
506     * 
507     * <BR /><BR /><B>NOTE:</B> This method determines whether a fresh {@code TagNode} is to be
508     * inserted by measuring the length of the internal {@code TagNode.str} (a {@code String})
509     * field. If {@code TagNode.str.length()} is not equal to the HTML token {@code TagNode.tok}
510     * length <B>plus 2</B>, then a fresh, pre-instantiated, node is replaced.  The {@code '+2'}
511     * figure comes from the additional characters {@code '<'} and {@code '>'} that start and end
512     * every HTML {@code TagNode}
513     * 
514     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
515     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
516     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
517     * 
518     * @return The number of {@code TagNode} elements that have were replaced with zero-attribute
519     * HTML Element Tags.
520     * 
521     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
522     */
523    public static int removeAllInnerTags(Vector<? super TagNode> html, int sPos, int ePos)
524    {
525        Object o;   TagNode tn;
526
527        int ret = 0;
528        LV  l   = new LV(sPos, ePos, html);
529
530        for (int i = (l.end-1); i >= l.start; i--)                  // Iterate the Loop-Variable
531            if ((o = html.elementAt(i)) instanceof TagNode)         // Only TagNode's have Inner-Tags
532                if (! (tn = (TagNode) o).isClosing)                 // Only "Opening TagNodes" have attributes
533                    if (tn.str.length() > (tn.tok.length() + 2))    // <TOK> *CANNOT* have Inner-Tags...
534                    {
535                        ret++;
536                        html.setElementAt(HTMLTags.hasTag(tn.tok, TC.OpeningTags), i);
537                        // HTMLTags.hasTag(tok, TC) gets an empty and pre-instantiated TagNode,
538                        // where TagNode.tok == 'tn.tok' and TagNode.isClosing = false
539                    }
540
541        return ret;
542    }
543
544
545    // ***************************************************************************************
546    // ***************************************************************************************
547    // String Length of the TextNode's
548    // ***************************************************************************************
549    // ***************************************************************************************
550
551
552    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link #textStrLength(Vector, int, int)} */
553    public static int textStrLength(Vector<? extends HTMLNode> html, DotPair dp)
554    { return textStrLength(html, dp.start, dp.end + 1); }
555
556    /** Convenience Method.  Invokes {@link #textStrLength(Vector, int, int)} */
557    public static int textStrLength(Vector<? extends HTMLNode> html)
558    { return textStrLength(html, 0, -1); }
559
560    /**
561     * This method will return the length of the strings <I><B>contained by all/only instances of
562     * {@code 'TextNode'}</B></I> among the nodes of the input HTML-{@code Vector}.   This is
563     * identical to the behavior of the method with the same name, but includes starting and ending
564     * bounds on the html {@code Vector}: {@code 'sPos'} &amp; {@code 'ePos'}.
565     * 
566     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
567     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
568     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
569     * 
570     * @return The sum of the lengths of the text contained by text-nodes in the {@code Vector} 
571     * between {@code 'sPos'} and {@code 'ePos'}.
572     * 
573     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
574     */
575    public static int textStrLength(Vector<? extends HTMLNode> html, int sPos, int ePos)
576    {
577        HTMLNode    n;
578        int         sum = 0;
579        LV          l   = new LV(html, sPos, ePos);
580
581        // Counts the length of each "String" in a "TextNode" between sPos and ePos
582        for (int i=l.start; i < l.end; i++)
583            if ((n = html.elementAt(i)).isTextNode())
584                sum += n.str.length();
585
586        return sum;
587    }
588
589
590    // ***************************************************************************************
591    // ***************************************************************************************
592    // Compact Adjacent / Adjoining TextNode's
593    // ***************************************************************************************
594    // ***************************************************************************************
595
596
597    /** Convenience Method.  Invokes {@link #compactTextNodes(Vector, int, int)} */
598    public static int compactTextNodes(Vector<HTMLNode> html)
599    { return compactTextNodes(html, 0, html.size()); }
600
601    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link #compactTextNodes(Vector, int, int)} */
602    public static int compactTextNodes(Vector<HTMLNode> html, DotPair dp)
603    { return compactTextNodes(html, dp.start, dp.end + 1); }     
604
605    /**
606     * Occasionally, when removing instances of {@code TagNode} from a vectorized-html 
607     * page, certain instances of {@code TextNode} which were not adjacent / neighbours in
608     * the {@code Vector}, all of a sudden become adjacent.  Although there are no major problems
609     * with contiguous instances of {@code TextNode} from the Search Algorithm's perspective,
610     * for programmer's, it can sometimes be befuddling to realize that the output text that
611     * is returned from a call to {@code Util.pageToString(html)} is not being found because
612     * the text that is left is broken amongst multiple instances of adjacent TextNodes.
613     *
614     * <BR /><BR />This method merely combines "Adjacent" instances of {@code class TextNode}
615     * in the {@code Vector} into single instances of {@code class TextNode}
616     *
617     * @param html Any vectorized-html web-page.  If this page contain any contiguously placed
618     * {@code TextNode's}, the extra's will be eliminated, and the internal-string's inside the
619     * node's ({@code TextNode.str}) will be combined.  This action will reduce the size of the
620     * actual html-{@code Vector}.
621     * 
622     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
623     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
624     * 
625     * @return The number of nodes that were eliminated after being combined, or 0 if there
626     * were no text-nodes that were removed.
627     * 
628     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
629     * 
630     * @see HTMLNode#str
631     * @see TextNode
632     */
633    public static int compactTextNodes(Vector<HTMLNode> html, int sPos, int ePos)
634    {
635        LV      l           = new LV(html, sPos, ePos);
636        boolean compacting  = false;
637        int     firstPos    = -1;
638        int     delta       = 0;
639
640        for (int i=l.start; i < (l.end - delta); i++)
641            if (html.elementAt(i).isTextNode())         // Is a TextNode
642            {
643                if (compacting) continue;               // Not in "Compacting Mode"
644                compacting = true;  firstPos = i;       // Start "Compacting Mode" - this is a TextNode
645            }
646            else if (compacting && (firstPos < (i-1)))  // Else - Must be a TagNode or CommentNode
647            {
648                // Save compacted TextNode String's into this StringBuilder
649                StringBuilder compacted = new StringBuilder();
650
651                // Iterate all TextNodes that were adjacent, put them together into StringBuilder
652                for (int j=firstPos; j < i; j++) compacted.append(html.elementAt(j).str);
653
654                // Place this new "aggregate TextNode" at location of the first TextNode that
655                // was compacted into this StringBuilder
656                html.setElementAt(new TextNode(compacted.toString()), firstPos);
657
658                // Remove the rest of the positions in the Vector that had TextNode's.  These have
659                // all been put together into the "Aggregate TextNode" at position "firstPos"
660                Util.removeRange(html, firstPos + 1, i);
661
662                // The change in the size of the Vector needs to be accounted for.
663                delta += (i - firstPos - 1);
664
665                // Change the loop-counter variable, too, since the size of the Vector has changed.
666                i = firstPos + 1;
667
668                // Since we just hit a CommentNode, or TagNode, exit "Compacting Mode."
669                compacting = false;
670
671            } else compacting = false;
672                // NOTE: This, ALSO, MUST BE a TagNode or CommentNode (just like the previous
673                //       if-else branch !)
674                // TRICKY: Don't forget this 'else' !
675
676        // Added - Don't forget the case where the Vector ends with a series of TextNodes
677        // TRICKY TOO! (Same as the HTML Parser... The ending or 'trailing' nodes must be parsed
678        int lastNodePos = html.size() - 1;
679        if (html.elementAt(lastNodePos).isTextNode()) if (compacting && (firstPos < lastNodePos))
680        {
681            StringBuilder compacted = new StringBuilder();
682
683            // Compact the TextNodes that were identified at the end of the Vector range.
684            for (int j=firstPos; j <= lastNodePos; j++) compacted.append(html.elementAt(j).str);
685
686            // Replace the group of TextNode's at the end of the Vector, with the single, aggregate
687            html.setElementAt(new TextNode(compacted.toString()), firstPos);
688            Util.removeRange(html, firstPos + 1, lastNodePos + 1);
689        }
690
691        return delta;
692    }
693
694
695    // ***************************************************************************************
696    // ***************************************************************************************
697    // Count New Lines
698    // ***************************************************************************************
699    // ***************************************************************************************
700
701
702    /** Convenience Method.  Invokes {@link #countNewLines(Vector, int, int)} */
703    public static int countNewLines(Vector<? extends HTMLNode> html)
704    { return countNewLines(html, 0, -1); }
705
706    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link #countNewLines(Vector, int, int)} */
707    public static int countNewLines(Vector<? extends HTMLNode> html, DotPair dp)
708    { return countNewLines(html, dp.start, dp.end + 1); }
709
710
711    /**
712     * This will count the number of new-line symbols present <B><I>- on the partial HTML
713     * page</I></B>. The count will include a sum of every {@code HTMLNode.str} that
714     * contains the standard new-line symbols: {@code \r\n, \r, \n}, meaning that UNIX, MSFT,
715     * Apple, etc. forms of text-line rendering should all be treated equally.
716     * 
717     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
718     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
719     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
720     * 
721     * @return The number of new-line characters in all of the {@code HTMLNode's} that occur
722     * between vectorized-page positions {@code 'sPos'} and {@code 'ePos.'}
723     * 
724     * <BR /><BR /><B>NOTE:</B> The regular-expression used here 'NEWLINEP' is as follows:
725     * 
726     * <DIV CLASS="SNIP">{@code
727     * private static final Pattern NEWLINEP = Pattern.compile("\\r\\n|\\r|\\n");
728     * }</DIV>
729     * 
730     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
731     * 
732     * @see StringParse#NEWLINEP
733     */
734    public static int countNewLines(Vector<? extends HTMLNode> html, int sPos, int ePos)
735    {
736        int newLineCount    = 0;
737        LV  l               = new LV(html, sPos, ePos);
738
739        for (int i=l.start; i < l.end; i++)
740
741                // Uses the Torello.Java.StringParse "New Line RegEx"
742                for (   Matcher m = StringParse.NEWLINEP.matcher(html.elementAt(i).str);
743                        m.find();
744                        newLineCount++);
745
746        return newLineCount;
747    }
748
749
750    // ***************************************************************************************
751    // ***************************************************************************************
752    // Count TextNode's
753    // ***************************************************************************************
754    // ***************************************************************************************
755
756
757    /** Convenience Method.  Invokes {@link #countTextNodes(Vector, int, int) */
758    public static int countTextNodes(Vector<HTMLNode> page)
759    { return countTextNodes(page, 0, -1); }
760
761    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link #countTextNodes(Vector, int, int) */
762    public static int countTextNodes(Vector<HTMLNode> page, DotPair dp)
763    { return countTextNodes(page, dp.start, dp.end + 1); }
764
765    /**
766     * Counts the number of {@code TextNode's} in a {@code Vector<HTMLNode>} between the demarcated
767     * array / {@code Vector} positions, {@code 'sPos'} and {@code 'ePos'}
768     * 
769     * @param page Any HTML page.
770     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
771     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
772     * 
773     * @return The number of {@code TextNode's} in the {@code Vector} between the demarcated
774     * indices.
775     * 
776     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
777     */
778    public static int countTextNodes(Vector<HTMLNode> page, int sPos, int ePos)
779    {
780        int counter = 0;
781        LV  l       = new LV(page, sPos, ePos);
782
783        // Iterates the entire page between sPos and ePos, incrementing the count for every
784        // instance of text-node.
785        for (int i=l.start; i < l.end; i++) if (page.elementAt(i).isTextNode()) counter++;
786
787        return counter;
788    }
789
790
791    // ***************************************************************************************
792    // ***************************************************************************************
793    // Count CommentNode's
794    // ***************************************************************************************
795    // ***************************************************************************************
796
797
798    /** Convenience Method.  Invokes {@link #countCommentNodes(Vector, int, int)} */
799    public static int countCommentNodes(Vector<HTMLNode> page)
800    { return countCommentNodes(page, 0, -1); }
801
802    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link #countCommentNodes(Vector, int, int)} */
803    public static int countCommentNodes(Vector<HTMLNode> page, DotPair dp)
804    { return countCommentNodes(page, dp.start, dp.end + 1); }
805
806    /**
807     * Counts the number of {@code CommentNode's} in an {@code Vector<HTMLNode>} between the
808     * demarcated array / {@code Vector} positions.
809     * 
810     * @param page Any HTML page.
811     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
812     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
813     * 
814     * @return The number of {@code CommentNode's} in the {@code Vector} between the demarcated
815     * indices.
816     * 
817     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
818     */
819    public static int countCommentNodes(Vector<HTMLNode> page, int sPos, int ePos)
820    {
821        int counter = 0;
822        LV  l       = new LV(page, sPos, ePos);
823
824        // Iterates the entire page between sPos and ePos, incrementing the count for every
825        // instance of comment-node.
826        for (int i=l.start; i < l.end; i++)
827             if (page.elementAt(i).isCommentNode())
828                counter++;
829
830        return counter;
831    }
832
833
834    // ***************************************************************************************
835    // ***************************************************************************************
836    // Count TagNode's
837    // ***************************************************************************************
838    // ***************************************************************************************
839
840
841    /** Convenience Method.  Invokes {@link #countTagNodes(Vector, int, int)} */
842    public static int countTagNodes(Vector<HTMLNode> page)
843    { return countTagNodes(page, 0, -1); }
844
845    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link #countTagNodes(Vector, int, int)} */
846    public static int countTagNodes(Vector<HTMLNode> page, DotPair dp)
847    { return countTagNodes(page, dp.start, dp.end + 1); }
848
849    /**
850     * Counts the number of {@code TagNode's} in a {@code Vector<HTMLNode>} between the demarcated 
851     * array / {@code Vector} positions.
852     * 
853     * @param page Any HTML page.
854     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
855     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
856     * 
857     * @return The number of {@code TagNode's} in the {@code Vector}.
858     * 
859     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
860     */
861    public static int countTagNodes(Vector<HTMLNode> page, int sPos, int ePos)
862    {
863        int counter = 0;
864        LV  l       = new LV(page, sPos, ePos);
865
866        // Iterates the entire page between sPos and ePos, incrementing the count for every
867        // instance of TagNode.
868        for (int i=l.start; i < l.end; i++) if (page.elementAt(i).isTagNode()) counter++;
869
870        return counter;
871    }
872
873
874    // ***************************************************************************************
875    // ***************************************************************************************
876    // String-Length Operations
877    // ***************************************************************************************
878    // ***************************************************************************************
879
880
881    /** Convenience Method.  Invokes {@link #strLength(Vector, int, int)} */
882    public static int strLength(Vector<? extends HTMLNode> html)
883    { return strLength(html, 0, -1); }
884
885    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link #strLength(Vector, int, int)} */
886    public static int strLength(Vector<? extends HTMLNode> html, DotPair dp)
887    { return strLength(html, dp.start, dp.end + 1); }
888
889    /**
890     * This method simply adds / sums the {@code String}-length of every {@code HTMLNode.str }
891     * field in the passed page-{@code Vector}.  It only counts nodes between parameters
892     * {@code sPos} (inclusive) and {@code ePos} (exclusive).
893     * 
894     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
895     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
896     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
897     * 
898     * @return The total length <B><I>- in characters -</I></B> of the sub-page of HTML between
899     * {@code 'sPos'} and {@code 'ePos'}
900     * 
901     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
902     * 
903     * @see #strLength(Vector)
904     */
905    public static int strLength(Vector<? extends HTMLNode> html, int sPos, int ePos)
906    {
907        int ret = 0;
908        LV  l   = new LV(html, sPos, ePos);
909
910        for (int i=l.start; i < l.end; i++) ret += html.elementAt(i).str.length();
911
912        return ret;
913    }
914
915
916    // ***************************************************************************************
917    // ***************************************************************************************
918    // Hash-Code Operations
919    // ***************************************************************************************
920    // ***************************************************************************************
921
922
923    /** Convenience Method.  Invokes {@link #hashCode(Vector, int, int)} */
924    public static int hashCode(Vector<? extends HTMLNode> html)
925    { return hashCode(html, 0, -1); }
926
927    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link #hashCode(Vector, int, int)} */
928    public static int hashCode(Vector<? extends HTMLNode> html, DotPair dp)
929    { return hashCode(html, dp.start, dp.end + 1); }
930
931    /**
932     * Generates a hash-code for a vectorized html page-{@code Vector}.
933     * 
934     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
935     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
936     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
937     * 
938     * @return Returns the {@code String.hashCode()} of the <I><B>partial HTML-page</B></i> as if
939     * it were not being stored as a {@code Vector}, but rather as HTML inside of a
940     * Java-{@code String}.
941     * 
942     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
943     * 
944     * @see #hashCode(Vector)
945     */
946    public static int hashCode(Vector<? extends HTMLNode> html, int sPos, int ePos)
947    {
948        int h   = 0;
949        LV  lv  = new LV(html, sPos, ePos);
950
951        for (int j=lv.start; j < lv.end; j++)
952        {
953            String  s = html.elementAt(j).str;
954            int     l = s.length();
955
956            // This line has been copied from the jdk8/jdk8 "String.hashCode()" method.
957            // The difference is that it iterates over the entire vector
958            for (int i=0; i < l; i++) h = 31 * h + s.charAt(i);
959        }
960
961        return h;
962    }
963
964
965    // ***************************************************************************************
966    // ***************************************************************************************
967    // Style-Node & Script-Node Block Removal Operations
968    // ***************************************************************************************
969    // ***************************************************************************************
970
971
972    /**
973     * Removes all HTML {@code 'style'} Node blocks.
974     * 
975     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
976     * 
977     * @return The number of {@code <STYLE>}-Node Blocks that were removed
978     * 
979     * @see TagNodeRemoveInclusive#first(Vector, String[])
980     */
981    public static int removeStyleNodeBlocks(Vector<? extends HTMLNode> html)
982    {
983        int removeCount = 0;
984        while (TagNodeRemoveInclusive.first(html, "style") > 0) removeCount++;
985        return removeCount;
986    }
987
988    /**
989     * Removes all {@code 'script'} Node blocks.
990     * 
991     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
992     * 
993     * @return The number of {@code SCRIPT}-Node Blocks that were removed
994     * 
995     * @see TagNodeRemoveInclusive#first(Vector, String[])
996     */
997    public static int removeScriptNodeBlocks(Vector<? extends HTMLNode> html)
998    {
999        int removeCount = 0;
1000        while (TagNodeRemoveInclusive.first(html, "script") > 0) removeCount++;
1001        return removeCount;
1002    }
1003
1004
1005    // ***************************************************************************************
1006    // ***************************************************************************************
1007    // JSON Script Nodes
1008    // ***************************************************************************************
1009    // ***************************************************************************************
1010
1011    /** Convenience Method.  Invokes {@link #getJSONScriptBlocks(Vector, int, int)} */
1012    public static Stream<String> getJSONScriptBlocks(Vector<HTMLNode> html)
1013    { return getJSONScriptBlocks(html, 0, -1); }
1014
1015    /** Convenience Method.  Accepts {@code DotPair}.  Invokes {@link #getJSONScriptBlocks(Vector, int, int)} */
1016    public static Stream<String> getJSONScriptBlocks(Vector<HTMLNode> html, DotPair dp)
1017    { return getJSONScriptBlocks(html, dp.start, dp.end + 1); }
1018
1019    /**
1020     * This method shall search for any and all {@code <SCRIPT TYPE="json">}
1021     * <I>JSON TEXT</I> {@code </SCRIPT>} block present in a range of Vectorized HTML.  The
1022     * search method shall simply look for the toke {@code "JSON"} in the {@code TYPE} attribute
1023     * of each and every {@code <SCRIPT> TagNode} that is found on the page.  The validity of the
1024     * {@code JSON} found within such blocks <I>is not checked for validity, nor is it even
1025     * guaranteed to be {@code JSON} data!</I>
1026     * 
1027     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1028     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
1029     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
1030     * 
1031     * @return This will return a {@code java.util.stream.Stream<String>} of each of the 
1032     * {@code JSON} elements present in the specified range of the Vectorized HTML passed to
1033     * parameter {@code 'html'}.
1034     * 
1035     * <EMBED CLASS="external-html" DATA-FILE-ID="STRMCNVT">
1036     * 
1037     * @see StrTokCmpr#containsIgnoreCase(String, Predicate, String)
1038     * @see Util#rangeToString(Vector, int, int)
1039     */
1040    public static Stream<String> getJSONScriptBlocks(Vector<HTMLNode> html, int sPos, int ePos)
1041    {
1042        // Whenever building lists, it is usually easiest to use a Stream.Builder
1043        Stream.Builder<String> b = Stream.builder();
1044
1045        // This Predicate simply tests that if the substring "json" (CASE INSENSITIVE) is found
1046        // in the TYPE attribute of a <SCRIPT TYPE=...> node, that the token-string is, indeed a
1047        // word - not a substring of some other word.  For instance: TYPE="json" would PASS, but
1048        // TYPE="rajsong" would FAIL - because the token string is not surrounded by white-space
1049
1050        final Predicate<String> tester = (String s) ->
1051            StrTokCmpr.containsIgnoreCase(s, (Character c) -> ! Character.isLetterOrDigit(c), "json");
1052
1053        // Find all <SCRIPT> node-blocks whose "TYPE" attribute abides by the tester String-predicate
1054        // named above.
1055        Vector<DotPair> jsonDPList = InnerTagFindInclusive.all
1056            (html, sPos, ePos, "script", "type", tester);
1057
1058        // Convert each of these DotPair element into a java.lang.String
1059        // Add the String to the Stream.Builder<String>
1060        for (DotPair jsonDP : jsonDPList)
1061            if (jsonDP.size() > 2)
1062                b.accept(Util.rangeToString(html, jsonDP.start + 1, jsonDP.end));
1063
1064        // Build the Stream, and return it.
1065        return b.build();
1066    }
1067
1068    // ***************************************************************************************
1069    // ***************************************************************************************
1070    // MISC
1071    // ***************************************************************************************
1072    // ***************************************************************************************
1073
1074
1075    /**
1076     * Inserts nodes, and allows a 'varargs' parameter.
1077     * 
1078     * @param html Any HTML Page
1079     * 
1080     * @param pos The position in the original {@code Vector} where the nodes shall be inserted.
1081     * 
1082     * @param nodes A list of nodes to insert.
1083     */
1084    public static void insertNodes(Vector<HTMLNode> html, int pos, HTMLNode... nodes)
1085    {
1086        Vector<HTMLNode> nodesVec = new Vector<>(nodes.length);
1087        for (HTMLNode node : nodes) nodesVec.addElement(node);
1088        html.addAll(pos, nodesVec);
1089    }
1090
1091    /**
1092     * <SPAN STYLE="color: red;"><B>OPT: Optimized</B></SPAN>
1093     * 
1094     * <BR /><BR />This method does the same thing as {@link #removeNodes(boolean, Vector, int[])}
1095     * but all error checking is skipped, and the input integer array is presumed to have
1096     * been sorted. There are no guarantees about the behavior of this method if the input array
1097     * {@code 'posArr'} is not sorted, <I>least-to-greatest,</I> or if there are duplicate or
1098     * negative values in this array.
1099     * 
1100     * <BR /><BR /><B>NOTE:</B> If the var-args input integer-array parameter is empty, this method
1101     * shall exit gracefully, and immediately.
1102     * 
1103     * @param page Any HTML-Page, usually ones generated by {@code HTMLPage.getPageTokens(...)},
1104     * but these may be obtained or created in any fashion so necessary.
1105     * 
1106     * @param posArr An array of integers which list/identify the nodes in the page to be removed.
1107     * Because this implementation has been optimized, no error checking will be performed on this
1108     * input.  It is presumed to be sorted, least-to-greatest, and that all values in the array are
1109     * valid-indices into the vectorized-html parameter {@code 'page'}
1110     */
1111    public static <T extends HTMLNode> void removeNodesOPT(Vector<T> page, int... posArr)
1112    {
1113        if (posArr.length == 0) return;
1114
1115        int endingInsertPos = page.size() - posArr.length;
1116        int posArrIndex     = 0;
1117        int insertPos       = posArr[0];
1118        int retrievePos     = posArr[0];
1119
1120        // There is very little that can be documented about these two loops.  Took 3 hours
1121        // to figure out.  Read the variables names for "best documentation"
1122
1123        while (insertPos < endingInsertPos)
1124        {
1125            // This inner-loop is necessary for when the posArr has consecutive-elements that
1126            // are *ALSO* consecutive-pointers.
1127            //
1128            // For instance, this invokation:
1129            // Util.removeNodes(page, 4, 5, 6); ...
1130            //      where 4, 5, and 6 are consecutive - the inner while-loop is required.
1131            //
1132            // For this invokation: 
1133            // Util.removeNodes(page, 2, 4, 6); 
1134            //      the inner-loop is not entered.
1135
1136            while ((posArrIndex < posArr.length) && (retrievePos == posArr[posArrIndex]))
1137            { retrievePos++; posArrIndex++; }
1138
1139            page.setElementAt(page.elementAt(retrievePos++), insertPos++);
1140        }
1141
1142        // Remove all remaining elements in the tail of the array.
1143        page.setSize(page.size() - posArr.length);
1144    }
1145
1146
1147    /**
1148     * This method remove each HTMLNode from the passed-parameter {@code 'page'} listed/identified 
1149     * by the input array {@code 'nodeList'}.
1150     * 
1151     * <BR /><BR /><B>NOTE:</B> If the var-args input integer-array parameter is empty, this method
1152     * shall exit gracefully, and immediately.
1153     * 
1154     * @param preserveInputArray This is a convenience input parameter that allows a programmer to
1155     * "preserve" the original input-parameter integer-array that is passed to this method.  It
1156     * could be argued this parameter is "superfluous" - however, keep in mind that the passed
1157     * parameter {@code 'nodeList'} <B><I>must be sorted</I></B> before this method is able
1158     * function properly. There is a sort that's performed within the body of this method.  Just in
1159     * case that the original order of the integer-array input-parameter must be preserved, its
1160     * possible to request for the sort to operate on "a clone" of the input-parameter
1161     * integer-array, instead of the original integer-array {@code 'nodeList'} itself.
1162     * 
1163     * @param page Any HTML-Page, usually ones generated by {@code HTMLPage.getPageTokens(...)},
1164     * but these may be obtained or created in any fashion so necessary.
1165     * 
1166     * @param nodeList An array of integers which list/identify the nodes in the page to be
1167     * removed.
1168     * 
1169     * @throws IllegalArgumentException If the {@code 'nodeList'} contains duplicate entries.
1170     * Obviously, no {@code HTMLNode} may be removed from the {@code Vector<HTMLNode>} more than
1171     * once.
1172     * 
1173     * @throws IndexOutOfBoundsException If the nodeList contains index-pointers / items that are
1174     * not within the bounds of the passed HTML-Page {@code Vector}.
1175     */
1176    public static <T extends HTMLNode> void removeNodes
1177        (boolean preserveInputArray, Vector<T> page, int... nodeList)
1178    {
1179        if (nodeList.length == 0) return;
1180
1181        // @Safe Var Args
1182        int[]   posArr  = preserveInputArray ? nodeList.clone() : nodeList;
1183        int     len     = posArr.length;
1184
1185        Arrays.sort(posArr);
1186
1187        // Check for duplicates in the nodeList, no HTMLNode may be removed twice!
1188        for (int i=0; i < (len - 1); i++)
1189            if (posArr[i] == posArr[i+1]) throw new IllegalArgumentException(
1190                "The input array contains duplicate items, this is not allowed.\n" +
1191                "This is since each array-entry is intended to be a pointer/index for items to " +
1192                "be removed.\nNo item can possibly be removed twice.!"
1193            );
1194
1195        // Make sure all nodes are within the bounds of the original Vector.  (no negative indexes,
1196        // no indexes greater than the size of the Vector)
1197        if ((posArr[0] < 0) || (posArr[len - 1] >= page.size()))
1198            throw new IndexOutOfBoundsException (
1199                "The input array contains entries which are not within the bounds of the " +
1200                "original-passed Vector.\nHTMLPage Vector has: " + page.size() + " elements.\n" +
1201                "Maximum element in the nodeList is [" + posArr[len - 1] + "], and the minimum " +
1202                "element is: [" + posArr[0] + "]"
1203            );
1204
1205        int endingInsertPos = page.size() - posArr.length;
1206        int posArrIndex     = 0;
1207        int insertPos       = posArr[0];
1208        int retrievePos     = posArr[0];
1209
1210        // There is very little that can be documented about these two loops.  Took 3 hours
1211        // to figure out.  Read the variables names for "best documentation"
1212
1213        while (insertPos < endingInsertPos)
1214        {
1215            // This inner-loop is necessary for when the posArr has consecutive-elements that
1216            // are *ALSO* consecutive-pointers.
1217            //
1218            // For instance, this invocation:
1219            // Util.removeNodes(page, 4, 5, 6);
1220            //      where 4, 5, and 6 are consecutive - the inner while-loop is required.
1221            //
1222            // For this invocation: 
1223            // Util.removeNodes(page, 2, 4, 6);
1224            //      the inner-loop is not entered.
1225            while ((posArrIndex < posArr.length) && (retrievePos == posArr[posArrIndex])) 
1226            { retrievePos++; posArrIndex++; }
1227
1228            page.setElementAt(page.elementAt(retrievePos++), insertPos++);
1229        }
1230
1231        // Remove all remaining elements in the tail of the array.
1232        page.setSize(page.size() - posArr.length);
1233    }
1234
1235    /** ConvenienceMethod.  Invokes {@link #replaceRange(Vector, int, int, Vector)} */
1236    public static void replaceRange
1237        (Vector<HTMLNode> page, DotPair range, Vector<HTMLNode> newNodes)
1238    { replaceRange(page, range.start, range.end+1, newNodes); }
1239
1240    /**
1241     * Replaces any all and all {@code HTMLNode's} located between the {@code Vector} locations
1242     * {@code 'sPos'} (inclusive) and {@code 'ePos'} (exclusive).  By exclusive, this means that
1243     * the {@code HTMLNode} located at positon {@code 'ePos'} <B><I>will not</I></B> be replaced,
1244     * but the one at {@code 'sPos'} <I><B>is replaced</B></I>.
1245     * 
1246     * <BR /><BR />The size of the {@code Vector} will change by {@code newNodes.size() - 
1247     * (ePos + sPos)}.  The contents situated between {@code Vector} location {@code sPos} and
1248     * {@code sPos + newNodes.size()} will, indeed, be the contents of the {@code 'newNodes'}
1249     * parameter.
1250     * 
1251     * @param page Any Java HTML page, constructed of {@code HTMLNode (TagNode & TextNode)}
1252     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
1253     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
1254     * @param newNodes Any Java HTML page-{@code Vector} of {@code HTMLNode}.
1255     * 
1256     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
1257     * 
1258     * @see #pollRange(Vector, int, int)
1259     * @see #removeRange(Vector, int, int)
1260     * @see #replaceRange(Vector, DotPair, Vector)
1261     */
1262    public static void replaceRange
1263        (Vector<HTMLNode> page, int sPos, int ePos, Vector<HTMLNode> newNodes)
1264    {
1265        // Torello.Java.LV
1266        LV l = new LV(sPos, ePos, page);
1267
1268        int oldSize     = ePos - sPos;
1269        int newSize     = newNodes.size();
1270        int insertPos   = sPos;
1271        int i           = 0;
1272
1273        while ((i < newSize) && (i < oldSize))
1274            page.setElementAt(newNodes.elementAt(i++), insertPos++);
1275
1276        if (newSize == oldSize) return;
1277
1278        if (newSize < oldSize)  // The new Vector is SMALLER than the old sub-range
1279                                // The rest of the nodes just need to be trashed
1280            Util.removeRange(page, insertPos, ePos);
1281        else                    // The new Vector is BIGGER than the old sub-range
1282                                // There are still more nodes to insert.
1283            page.addAll(ePos, newNodes.subList(i, newSize));
1284    }
1285
1286    /**
1287     * Java's {@code java.util.Vector} class does not allow public access to the
1288     * {@code removeRange(start, end)} function.  It is protected in Java's Documentation about
1289     * the {@code Vector} class.  This method does exactly that, nothing else.
1290     * 
1291     * @param page Any Java HTML page, constructed of {@code HTMLNode (TagNode & TextNode)}
1292     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
1293     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
1294     * 
1295     * @return the number of nodes removed.
1296     * 
1297     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
1298     * 
1299     * @see #pollRange(Vector, int, int)
1300     * @see #removeRange(Vector, DotPair)
1301     */
1302    public static <T extends HTMLNode> int removeRange(Vector<T> page, int sPos, int ePos)
1303    {
1304        // Torello.Java.LV
1305        LV  l   = new LV(sPos, ePos, page);
1306
1307        // Shift the nodes in position Vector[l.end through page.size()] to vector-position
1308        // Vector[l.start]
1309        int end = page.size() - l.end - 1;
1310
1311        for (int i=0; i <= end; i++) page.setElementAt(page.elementAt(l.end + i), l.start + i);
1312
1313        // Number of nodes to remove
1314        int numToRemove = l.end - l.start;
1315
1316        // Remove the tail - all nodes starting at:
1317        // vector-position[page.size() - (l.end - l.start)]
1318        page.setSize(page.size() - numToRemove);
1319
1320        return numToRemove;
1321    }
1322
1323    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link #removeRange(Vector, int, int)} */
1324    public static int removeRange(Vector<? extends HTMLNode> html, DotPair dp)
1325    { return removeRange(html, dp.start, dp.end + 1); }
1326
1327    /**
1328     * Java's {@code java.util.Vector} class does not allow public access to the
1329     * {@code removeRange(start, end)} function.  It is listed as {@code 'protected'} in Java's
1330     * Documentation about the {@code class Vector.}  This method upstages that, and performs the
1331     * {@code 'Poll'} operation, where the nodes are first removed, stored, and then return as a
1332     * function result.
1333     * 
1334     * <BR /><BR /><B>FURTHERMORE:</B> The nodes that are removed are placed in a separate return
1335     * {@code Vector}, and returned as a result to this method.
1336     * 
1337     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1338     * @param sPos <EMBED CLASS="external-html" DATA-FILE-ID="SPOSVEC">
1339     * @param ePos <EMBED CLASS="external-html" DATA-FILE-ID="EPOSVEC">
1340     * 
1341     * @return A complete list ({@code Vector<HTMLNode>}) of the nodes that were removed.
1342     * 
1343     * @throws IndexOutOfBoundsException <EMBED CLASS="external-html" DATA-FILE-ID="VIOOBEX">
1344     * 
1345     * @see #removeRange(Vector, int, int)
1346     * @see #removeRange(Vector, DotPair)
1347     * @see #pollRange(Vector, DotPair)
1348     */
1349    public static Vector<HTMLNode> pollRange(Vector<? extends HTMLNode> html, int sPos, int ePos)
1350    {
1351        LV                  l   = new LV(html, sPos, ePos);
1352        Vector<HTMLNode>    ret = new Vector<HTMLNode>(l.end - l.start);
1353
1354        // Copy the elements from the input vector into the return vector
1355        for (int i=l.start; i < l.end; i++) ret.add(html.elementAt(i));
1356
1357        // Remove the range from the input vector (this is the meaning of 'poll')
1358        Util.removeRange(html, sPos, ePos);
1359
1360        return ret;
1361    }
1362
1363    /** Convenience Method.  Receives {@code DotPair} and Invokes {@link #pollRange(Vector, int, int)}. */
1364    public static Vector<HTMLNode> pollRange(Vector<? extends HTMLNode> html, DotPair dp)
1365    { return pollRange(html, dp.start, dp.end + 1); }
1366
1367    /**
1368     * This removes every element from the {@code Vector} beginning at position 0, all the way to
1369     * position {@code 'pos'} (exclusive).  The {@code elementAt(pos)} remains in the original page
1370     * input-{@code Vector}.  This is the definition of 'exclusive'.
1371     * 
1372     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1373     * 
1374     * @param pos Any position within the range of the input {@code Vector}.
1375     * 
1376     * @return The elements in the {@code Vector} from position: {@code 0 ('zero')} all the way to
1377     * position: {@code 'pos'}
1378     */
1379    public static Vector<HTMLNode> split(Vector<? extends HTMLNode> html, int pos)
1380    { return pollRange(html, 0, pos); }
1381
1382    /**
1383     * Removes the first and last element of a vectorized-HTML web-page, or sub-page.  Generally,
1384     * this could be used to remove the surrounding tag's {@code '<DIV>' ... '</DIV>'}, or
1385     * something similar.
1386     * 
1387     * <BR /><BR /><SPAN STYLE="color: red;"><B>IMPORTANT:</B></SPAN> This method <B>WILL NOT
1388     * CHECK</B> whether there are matching HTML open-and-close tags at the end beginning and end
1389     * of this sub-section.  Generally, though, that is how this method may be used.
1390     * 
1391     * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1392     * 
1393     * @throws IllegalArgumentException If the {@code Vector} has fewer than two elements.
1394     */
1395    public static void removeFirstLast(Vector<? extends HTMLNode> html)
1396    {
1397        int size = html.size();
1398
1399        if (size < 2) throw new IllegalArgumentException(
1400            "You have requested that the first and last elements the input 'page' parameter (a vector) be removed.  " +
1401            "However, the vector size is only [" + size  + "], so this cannot be performed."
1402        );
1403
1404        // NOTE: *** This removes elementAt(0) and elementAt(size-1)
1405        //       *** NOT ALL ELEMENTS BETWEEN 0 and (size-1)
1406        Util.removeNodesOPT(html, 0, size-1);
1407    }
1408
1409
1410    // ***************************************************************************************
1411    // ***************************************************************************************
1412    // Inclusive 
1413    // ***************************************************************************************
1414    // ***************************************************************************************
1415
1416
1417    /**
1418     * <CODE>Util.Inclusive Documentation</CODE><BR /><BR />
1419     * <EMBED CLASS="external-html" DATA-FILE-ID="UTILINCL">
1420     */
1421    @Torello.HTML.Tools.JavaDoc.StaticFunctional
1422    public static class Inclusive
1423    {
1424        private Inclusive() { }
1425
1426        // ***************************************************************************************
1427        // ***************************************************************************************
1428        // Inclusive Find/Get
1429        // ***************************************************************************************
1430        // ***************************************************************************************
1431
1432        /**
1433         * This finds the closing HTML {@code 'TagNode'} match for a given opening
1434         * {@code 'TagNode'} in a given-input html page or sub-section.
1435         *
1436         * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1437         *
1438         * @param nodeIndex An index into that {@code Vector}.  This index must point to an
1439         * {@code HTMLNode} element that is:
1440         *
1441         * <BR /><BR /><OL CLASS="JDOL">
1442         * <LI>An instance of {@code TagNode}</LI>
1443         * <LI>A {@code TagNode} whose {@code 'isClosing'} field is <B>FALSE</B></LI>
1444         * <LI>Is not a {@code 'singleton'} HTML element-token
1445         * (i.e. {@code <IMG>, <BR>, <H1>, etc...})
1446         * </LI>
1447         * </OL>
1448         *
1449         * @return An "inclusive search" finds {@code OpeningTag} and {@code ClosingTag} pairs - 
1450         * <I>and returns all the elements between them in the contents of a 
1451         * return-{@code Vector}, or {@code Vector DotPair}-end-point value</I>.  This method
1452         * will take a particular node of a {@code Vector}, and (as long it has a match) 
1453         * find it's <I><B>closing {@code HTMLNode} match.</B></I>  The integer returned will
1454         * be the index into this page of the closing, matching {@code TagNode.}
1455         *
1456         * @throws TagNodeExpectedException If the node in the {@code Vector}-parameter
1457         * {@code 'html'} contained at index {@code 'nodeIndex'} is not an instance of
1458         * {@code TagNode}, then this exception is thrown.
1459         *
1460         * @throws OpeningTagNodeExpectedException If the node in the {@code Vector}-parameter 
1461         * {@code 'html'} at index {@code 'nodeIndex'} is a closing version of the HTML element,
1462         * then this exception shall throw.
1463         *
1464         * @throws InclusiveException If the node in {@code Vector}-parameter {@code 'html'},
1465         * pointed-to by index {@code 'nodeIndex'} is an HTML {@code 'Singleton'} / Self-Closing
1466         * Tag, then this exception will be thrown.
1467         *
1468         * @see TagNode
1469         * @see TagNode#tok
1470         * @see TagNode#isClosing
1471         * @see HTMLNode
1472         */
1473        public static int find(Vector<? extends HTMLNode> html, int nodeIndex)
1474        {
1475            TagNode     tn          = null;
1476            HTMLNode    n           = null;
1477            String      tok         = null;
1478
1479            if (! html.elementAt(nodeIndex).isTagNode())
1480                throw new TagNodeExpectedException (
1481                    "You have attempted to find a closing tag to match an opening one, " +
1482                    "but the 'nodeIndex' (" + nodeIndex + ") you have passed doesn't contain " +
1483                    "an instance of TagNode."
1484                );
1485            else tn = (TagNode) html.elementAt(nodeIndex);
1486
1487            if (tn.isClosing) throw new OpeningTagNodeExpectedException(
1488                "The TagNode indicated by 'nodeIndex' = " + nodeIndex + " has its 'isClosing' " +
1489                "boolean as TRUE - this is not an opening TagNode, but it must be to continue."
1490            );
1491
1492            // Checks to ensure this token is not a 'self-closing' or 'singleton' tag.
1493            // If it is an exception shall throw.
1494            tok = tn.tok;
1495            InclusiveException.check(tok);
1496
1497            int         end         = html.size();
1498            int         openCount   = 1;
1499
1500            for (int pos = nodeIndex; pos < end; pos++)
1501                if ((n = html.elementAt(pos)).isTagNode())
1502                    if ((tn = ((TagNode) n)).tok.equals(tok))
1503                    {
1504                        openCount += tn.isClosing ? -1 : 1;
1505                        if (openCount == 0) return pos;
1506                    }
1507
1508            return -1;
1509        }
1510
1511        /**
1512         * Convenience Method.  Invokes {@link #find(Vector, int)}.
1513         * <BR /><BR />Converts output to <B><CODE>'GET'</CODE></B> format ({@code Vector}-sublist),
1514         * using {@link #cloneRange(Vector, int, int)}
1515         */
1516        public static Vector<HTMLNode> get(Vector<? extends HTMLNode> html, int nodeIndex)
1517        { 
1518            int endPos = find(html, nodeIndex);
1519            return (endPos == -1) ? null : cloneRange(html, nodeIndex, endPos + 1);
1520        }
1521
1522        /**
1523         * Convenience Method.  Invokes {@link #find(Vector, int)}.
1524         * <BR /><BR />Converts output to <B><CODE>'PEEK'</CODE></B> format ({@code SubSection}),
1525         * using {@link #cloneRange(Vector, int, int)}
1526         */
1527        public static SubSection peek(Vector<? extends HTMLNode> html, int nodeIndex)
1528        {
1529            int endPos = find(html, nodeIndex);
1530
1531            return (endPos == -1) ? null : new SubSection(
1532                new DotPair(nodeIndex, endPos),
1533                cloneRange(html, nodeIndex, endPos + 1)
1534            );
1535        }
1536
1537        /**
1538         * Convenience Method.  Invokes {@link #find(Vector, int)}.
1539         * <BR /><BR />Converts output to <B><CODE>'POLL'</CODE></B> format ({@code Vector}-sublist),
1540         * using {@link #pollRange(Vector, int, int)}.  Removes Sub-List.
1541         */
1542        public static Vector<HTMLNode> poll(Vector<? extends HTMLNode> html, int nodeIndex)
1543        {
1544            int endPos = find(html, nodeIndex);
1545            return (endPos == -1) ? null : pollRange(html, nodeIndex, endPos + 1);
1546        }
1547
1548        /**
1549         * Convenience Method.  Invokes {@link #find(Vector, int)}.
1550         * <BR /><BR />Converts output to <B><CODE>'REMOVE'</CODE></B> format ({@code int} - number
1551         * of nodes removed),  using {@link #removeRange(Vector, int, int)}.  Removes Sub-List.
1552         */
1553        public static int remove(Vector<? extends HTMLNode> html, int nodeIndex)
1554        {
1555            int endPos = find(html, nodeIndex);
1556            return (endPos == -1) ? 0 : removeRange(html, nodeIndex, endPos + 1);
1557        }
1558
1559        // ***************************************************************************************
1560        // ***************************************************************************************
1561        // Optimized Methods, Inclusive Find/Get/Subsection
1562        // ***************************************************************************************
1563        // ***************************************************************************************
1564
1565        /**
1566         * Convenience Method.  Invokes {@link #dotPairOPT(Vector, int)}.
1567         * <BR /><BR />Converts output to {@code Vector<HTMLNode>}.
1568         */
1569        public static Vector<HTMLNode> vectorOPT(Vector<? extends HTMLNode> html, int tagPos)
1570        {
1571            DotPair dp = dotPairOPT(html, tagPos);
1572            if (dp == null) return null;
1573            else            return Util.cloneRange(html, dp.start, dp.end + 1);
1574        }
1575
1576        /**
1577         * Convenience Method.  Invokes {@link #dotPairOPT(Vector, int)}.
1578         * <BR /><BR />Converts output to {@code SubSection}. 
1579         */
1580        public static SubSection subSectionOPT(Vector<? extends HTMLNode> html, int tagPos)
1581        {
1582            DotPair dp = dotPairOPT(html, tagPos);
1583            if (dp == null) return null;
1584            else            return new SubSection(dp, Util.cloneRange(html, dp.start, dp.end + 1));
1585        }
1586
1587        /**
1588         * <EMBED CLASS="external-html" DATA-FILE-ID="UTILIOPT">
1589         * <!-- Inclusive Opt Description -->
1590         * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1591         * @param tagPos <EMBED CLASS="external-html" DATA-FILE-ID="UTILOPTTP">
1592         * @return A <B>'DotPair'</B> version of an inclusive, end-to-end HTML tag-element.
1593         * <EMBED CLASS="external-html" DATA-FILE-ID="UTILOPTJSN"> 
1594         * <!-- Note on JS-DOM Tree innerHTML -->
1595         * @see TagNode
1596         * @see TagNode#isClosing
1597         * @see TagNode#tok
1598         * @see DotPair
1599         */
1600        public static DotPair dotPairOPT(Vector<? extends HTMLNode> html, int tagPos)
1601        {
1602            // Temp Variables
1603            HTMLNode n;     TagNode tn;     int openCount = 1;
1604
1605            int len = html.size();
1606
1607            // This is the name (token) of the "Opening HTML Element", we are searching for
1608            // the matching, closing element
1609            String tok = ((TagNode) html.elementAt(tagPos)).tok;
1610
1611            for (int i = (tagPos+1); i < len; i++)
1612                if ((n = html.elementAt(i)).isTagNode())
1613                    if ((tn = (TagNode) n).tok.equals(tok))
1614                    {
1615                        // This keeps a "Depth Count" - where "depth" is just the number of 
1616                        // opened tags, for which a matching, closing tag hasn't been found yet.
1617                        openCount += (tn.isClosing ? -1 : 1);
1618
1619                        // When all open-tags of the specified HTML Element 'tok' have been
1620                        // found, search has finished.
1621                        if (openCount == 0) return new DotPair(tagPos, i);
1622                    }
1623
1624            // Was not found
1625            return null;
1626        }
1627
1628        /**
1629         * Convenience Method.  Invokes {@link #dotPairOPT(Vector, int, int)}.
1630         * <BR /><BR />Converts output to {@code Vector<HTMLNode>}.
1631         */
1632        public static Vector<HTMLNode> vectorOPT
1633            (Vector<? extends HTMLNode> html, int tagPos, int end)
1634        {
1635            DotPair dp = dotPairOPT(html, tagPos, end);
1636            if (dp == null) return null;
1637            else            return Util.cloneRange(html, dp.start, dp.end + 1);
1638        }
1639
1640        /**
1641         * Convenience Method.  Invokes {@link #dotPairOPT(Vector, int, int)}.
1642         * <BR /><BR />Converts output to {@code SubSection}.
1643        */
1644        public static SubSection subSectionOPT
1645            (Vector<? extends HTMLNode> html, int tagPos, int end)
1646        {
1647            DotPair dp = dotPairOPT(html, tagPos, end);
1648            if (dp == null) return null;
1649            else            return new SubSection(dp, Util.cloneRange(html, dp.start, dp.end + 1));
1650        }
1651
1652        /**
1653         * <EMBED CLASS="external-html" DATA-FILE-ID="UTILIOPT">
1654         * <!-- Inclusive Opt Description -->
1655         * @param html <EMBED CLASS="external-html" DATA-FILE-ID="HTMLVEC">
1656         * @param tagPos <EMBED CLASS="external-html" DATA-FILE-ID="UTILOPTTP">
1657         * @param end <EMBED CLASS="external-html" DATA-FILE-ID="UTILOPTEND">
1658         * @return A <B>'DotPair'</B> version of an inclusive, end-to-end HTML tag-element.
1659         * <EMBED CLASS="external-html" DATA-FILE-ID="UTILOPTJSN">
1660         * <!-- Note on JS-DOM Tree innerHTML -->
1661         * @see TagNode
1662         * @see TagNode#isClosing
1663         * @see TagNode#tok
1664         * @see DotPair
1665         */
1666        public static DotPair dotPairOPT(Vector<? extends HTMLNode> html, int tagPos, int end)
1667        {
1668            // Temp Variables
1669            HTMLNode n;     TagNode tn;     int openCount = 1;      int endPos;
1670
1671            // This is the name (token) of the "Opening HTML Element", we are searching for
1672            // the matching, closing element
1673            String tok = ((TagNode) html.elementAt(tagPos)).tok;
1674
1675            for (endPos = (tagPos+1); endPos < end; endPos++)
1676                if ((n = html.elementAt(endPos)).isTagNode())
1677                    if ((tn = (TagNode) n).tok.equals(tok))
1678                    {
1679                        // This keeps a "Depth Count" - where "depth" is just the number of
1680                        // opened tags, for which a matching, closing tag hasn't been found yet.
1681                        openCount += (tn.isClosing ? -1 : 1);
1682
1683                        // System.out.print(".");
1684
1685                        // When all open-tags of the specified HTML Element 'tok' have been
1686                        // found, search has finished.
1687                        if (openCount == 0) return new DotPair(tagPos, endPos);
1688                    }
1689
1690            // The end of the vectorized-html page (or subsection) was reached, but the
1691            // matching-closing element was not found.
1692            return null; // assert(endPos == html.size());
1693        }
1694    }
1695}