View Javadoc

1   /*
2    * $Id: OASTStringLocator.java,v 1.8 2005/06/01 17:38:37 jlerner Exp $
3    *
4    * Copyright (c) 1999-2004, BBN Technologies, LLC.
5    * All rights reserved.
6    * http://www.daml.org/legal/opensource/bbn_license.html
7    */
8   package com.bbn.swede.core.dom;
9   
10  import com.bbn.swede.core.OWLCore;
11  
12  /***
13   * Traverses an OAST to find exact start offsets and lengths for its nodes.
14   * This class is intended for use after a full-file parse, since locating
15   * nodes against a file stream while SAX is still in the middle of parsing
16   * from the same file is incredibly slow.
17   * @author jlerner
18   */
19  public class OASTStringLocator
20  {
21     /***
22      * The full text to use for locating nodes.  This should be the real text
23      * for the parse, not the dummy text used to fool the SAX parser into parsing
24      * part of a document.
25      */
26     protected String _sText;
27     /***
28      * Creates an OAST string locator to run against a string representation of
29      * the document.
30      * @param sText A string containing the entire portion of the OAST to be
31      *              located.
32      */
33     public OASTStringLocator(String sText)
34     {
35        _sText = sText;
36     }
37  
38     /***
39      * <p>Locates the nodes in an OAST subtree.  The method used varies by node 
40      * type - the top-level OASTNode subclasses TagNode, AttributeNode, and 
41      * Literal are used as a rough cut to determine which helper method will 
42      * be invoked to do the work.</p>
43      *
44      * <p>Do not invoke locate() on a subtree that contains unparseable nodes.  It
45      * is only designed for use after a successful parse and its behavior in the
46      * presence of invalid XML fragments is undefined.</p>
47      * @param root The root of the tree to locate.
48      * @see #locateAttribute(AttributeNode)
49      * @see #locateBegin(TagNode)
50      * @see #locateEnd(TagNode)
51      * @see #locateLiteral(Literal)
52      * @see #locateRoot(OASTNode)
53      */
54     public void locate(OASTNode root)
55     {
56        if (root instanceof TagNode)
57        {
58           locateBegin((TagNode) root);
59           locateChildren(root);
60           locateEnd((TagNode) root);
61        }
62        else if (root instanceof AttributeNode)
63        {
64           locateAttribute((AttributeNode) root);
65        }
66        else if (root instanceof Literal)
67        {
68           locateLiteral((Literal) root);
69        }
70        else
71        {
72           locateRoot(root);
73           locateChildren(root);
74        }
75        //Don't need to worry about unparseable nodes here.  OASTLocator will
76        //only be invoked after a successful parse, so no invalid XML fragments
77     }
78  
79     /***
80      * Recursive helper method for locate(OASTNode).  Calls locate(OASTNode) on 
81      * each of a node's children.
82      * @param root The node whose children must be located.
83      */
84     private void locateChildren(OASTNode root)
85     {
86        OASTNode[] children = root.getChildren();
87        for (int i = 0; i < children.length; i++)
88        {
89           locate(children[i]);
90        }
91     }
92  
93     /***
94      * Finds the start offset of a tag node.  The offset is determined by
95      * searching forward from the start offset of <code>tag</code>'s parent node.
96      * @param tag The tag node to locate.
97      */
98     protected void locateBegin(TagNode tag)
99     {
100       int iOffset;
101       OASTNode nodePrev = tag.getParent().getPreviousChild(tag);
102       if (nodePrev != null)
103       {
104          iOffset = nodePrev.getOffset() + nodePrev.getLength();
105       }
106       else
107       {
108          OASTNode parent = tag.getParent();
109          iOffset = parent.getOffset();
110          if (parent.getParent() != null && parent.getParent().getParent() != null)
111 //         if (parent.getQName().equals(tag.getQName()))
112          {
113             iOffset += parent.getQName().length() + 1;
114          }
115       }
116       iOffset = _sText.indexOf("<" + tag.getQName(), iOffset);
117       tag.setOffset(iOffset);
118    }
119 
120    /***
121     * Finds the length of a tag node.  The length is determined by searching
122     * forward from the just-past-the-end offset of <code>tag</code>'s last
123     * child.
124     * @param tag The tag node to locate.
125     */
126    protected void locateEnd(TagNode tag)
127    {
128       int iOffset;
129       OASTNode[] children = tag.getChildren();
130       if (children != null && children.length > 0)
131       {
132          OASTNode nodeLast = children[children.length - 1];
133          iOffset = nodeLast.getOffset() + nodeLast.getLength();
134       }
135       else
136       {
137          iOffset = tag.getOffset() + tag.getQName().length();
138       }
139       int iPosSingleton = _sText.indexOf("/>", iOffset);
140       int iPosEnd = _sText.indexOf("</" + tag.getQName(), iOffset);
141       if (iPosEnd >= 0 && (iPosSingleton < 0 || iPosEnd < iPosSingleton))
142       {
143          iOffset = _sText.indexOf(">", iPosEnd) + 1;
144       }
145       else
146       {
147          iOffset = iPosSingleton + 2;
148          tag.setBeginRegion(iOffset - tag.getOffset());
149          tag.setEndRegion(iOffset);
150          //should force middle/end regions to be null
151          tag.setLength(iOffset - tag.getOffset());
152          return;
153       }
154 
155       tag.setLength(iOffset - tag.getOffset());
156 
157       //Partition the node
158       AttributeNode att = tag.getLastAttribute();
159       iOffset =
160          (att == null
161             ? tag.getOffset() + tag.getQName().length() + 1
162             : att.getOffset() + att.getLength());
163       int iEnd = _sText.indexOf(">", iOffset) + 1;
164       tag.setBeginRegion(iEnd - tag.getOffset());
165       int iBegin =
166          _sText
167             .substring(tag.getOffset(), tag.getOffset() + tag.getLength())
168             .lastIndexOf("<");
169       tag.setEndRegion(tag.getOffset() + iBegin);
170    }
171 
172    /***
173     * Finds the start offset and length of an attribute node.  The offset is
174     * found by searching forward from the just-past-the-end offset of
175     * <code>node</code>'s parent's previous child (or its parent's start offset,
176     * if <code>node</code> is its first child), and the length is determined by
177     * searching forward from the located start offset.
178     * @param node The attribute node to locate.
179     */
180    protected void locateAttribute(AttributeNode node)
181    {
182       OASTNode parent = node.getParent();
183       OASTNode prev = parent.getPreviousChild(node);
184       int iOffset =
185          _sText.indexOf(
186             node.getQName(),
187             prev == null
188                ? parent.getOffset()
189                : prev.getOffset() + prev.getLength());
190       node.setOffset(iOffset);
191 
192       int iPosSingle = _sText.indexOf("\'", iOffset);
193       int iPosDouble = _sText.indexOf("\"", iOffset);
194       if (iPosDouble >= 0 && (iPosSingle < 0 || iPosDouble < iPosSingle))
195       {
196          iOffset = _sText.indexOf("\"", iPosDouble + 1) + 1;
197       }
198       else if (iPosSingle >= 0)
199       {
200          iOffset = _sText.indexOf("\'", iPosSingle + 1) + 1;
201       }
202       else
203       {
204          OWLCore.logWarning(
205             OWLCore.getID(),
206             "The impossible has happened.",
207             new Exception());
208       }
209       node.setLength(iOffset - node.getOffset());
210 
211       //partition the attribute
212       int iValPos =
213          _sText.substring(
214             node.getOffset(),
215             node.getOffset() + node.getLength()).indexOf("=");
216       node.setValueRegion(node.getOffset() + iValPos + 1);
217    }
218 
219    /***
220     * Finds the start offset and length of a literal node.  Since literals, by
221     * definition, fill the entire space between the opening and closing tag
222     * of their parent, the literal is located based on the offset of the first
223     * '&gt;' beyond the parent's start offset and the first '&lt;' after that.
224     * @param lit The literal node to locate.
225     */
226    protected void locateLiteral(Literal lit)
227    {
228       OASTNode parent = lit.getParent();
229       OASTNode prev = parent.getPreviousChild(lit);
230       int iOffset;
231       if (prev != null)
232       {
233          iOffset = prev.getOffset() + prev.getLength();
234       }
235       else
236       {
237          iOffset = parent.getOffset();
238       }
239       if (prev == null || prev instanceof AttributeNode)
240       {
241          iOffset = _sText.indexOf(">", parent.getOffset()) + 1;
242       }
243       lit.setOffset(iOffset);
244       iOffset = _sText.indexOf("<", iOffset);
245       if (iOffset < 0)
246       {
247          iOffset = _sText.length();
248       }
249       lit.setLength(iOffset - lit.getOffset());
250    }
251 
252    /***
253     * Sets the start offset and length of the root node.  The root offset is
254     * always zero, and the length is the length of the string provided at
255     * construction.  
256     * @param root The root node to locate.
257     */
258    protected void locateRoot(OASTNode root)
259    {
260       root.setOffset(0);
261       root.setLength(_sText.length());
262    }
263 
264 }