//*********************************************************************************
// class tig.xml.DomDoc
// Software released under the General Public License (version 2 or later), available at
// http://www.gnu.org/copyleft/gpl.html
//*********************************************************************************
package tig.xml;
import tig.GeneralConstants;
import tig.Strings;
import org.w3c.dom.*;
import java.util.regex.*;
import java.util.Vector;
/******************************************************************************
Wrapps a <B><CODE>org.w3c.dom.Document</CODE></B>, providing easy access to the document's contents.
<BR>Usage of this class is based on the notion of <B>current path</B> of an element, which describes its path to root node, using "/" as separator and the names of the node's ancestors.
<BR>For example, with the XML structure :
<PRE>
<rootNode>
<elt1>
<elt2>
<elt3/>
</elt2>
</elt1>
</rootNode>
</PRE>
the current path of elt3 is "<CODE>rootNode/elt1/elt2</CODE>".
<BR>So you specify which part of the document you want to access with <CODE>setCurrentPath()</CODE>, and then
use the other methods.
<BR>If the path is not set, it is considered as "", which is the path to root node.
So the results are obtained for the whole document.
@author Thierry Graff
@history apr 13 2002 : creation.
@todo homogeneity of return between getAttributeValue and getAttributesValues (null Exception)
@todo shorten test methods.
@todo find a way to introduce regExp in getAttributesValues(String, String[]).
@todo use Node.getNodeName() for currentPath???
*********************************************************************************/
public class DomDoc implements GeneralConstants{
//=================================================================================
// INSTANCE VARIABLES
//=================================================================================
private Document _doc;
private String _curPath;
private int _regExMode;
//=================================================================================
// CONSTANTS
//=================================================================================
private final static String SLASH = "/";
private final static String WILDCARD = "*";
private final static String POINT_WILDCARD = ".*";
/** Constant indicating that regular expressions (in {@link #getAttributesValues(String,String)})
are interpreted the standard way.
<BR>Used with {@link #setRegExMode(int)}. */
public static final int REGEX_NORMAL = 0;
/** Constant indicating that regular expressions (in {@link #getAttributesValues(String,String)})
are modified before being interpreted : wildcards ('*') are changed to '.*'. This mode permits to consider
that the '*' is equivalent to any sequence of characters. Used with {@link #setRegExMode(int)}.
<BR><B>This is the defalut mode.</B>
*/
public static final int REGEX_WILDCARD_ONLY = 1;
//=================================================================================
// CONSTRUCTORS
//=================================================================================
/** Unique constructor.
@param doc the DOM Document used to handle internally the data.
*/
public DomDoc(Document doc){
_doc = doc;
_curPath = BLANK;
_regExMode = REGEX_WILDCARD_ONLY;
}// end constructor
//=================================================================================
// METHODS
//=================================================================================
//*************** get / set currentPath ***************
/** Sets the "current path". See class documentation for more information. */
public void setCurrentPath(String path){ _curPath = path; }
/** Returns the "current path". See class documentation for more information. */
public String getCurrentPath(){ return _curPath; }
//*************** get / set Document ***************
/** Sets the Document that this <CODE>DomDoc</CODE> is wrapping.
The current path is set to empty String (root node). */
public void setDocument(Document doc){
_doc = doc;
_curPath = BLANK;
}
/** Returns the Document that this <CODE>DomDoc</CODE> is wrapping. */
public Document getDocument(){ return _doc; }
//*************** setRegExMode ***************
/** Sets the "regular expression mode" ; see <CODE>REGEX_XXX</CODE> constants. */
public void setRegExMode(int regExMode){ _regExMode = regExMode; }
//*************** getAttributeValue ***************
/** Method to get the value of an attribute of a tag of current path.
<BR>Convenient if the document contains <B>one and only one</B> tag 'tagName' ; this tag must have
an attribute 'attributeName'. Written to directly get a String.
<BR>Example :
<PRE>
<myXmlDoc>
...
<theTag theAttribute = "theValue">
...
</myXmlDoc>
</myXmlDoc></PRE>
The call <B><CODE>getAttributeValue(theTag, theAttribute)</CODE></B> will return "theValue".
<BR><BR>This method is sensitive to last call to {@link #setCurrentPath(String)}
<BR><BR>To retrieve attribute values of several tags, or several attribute values of a tag, use
{@link #getAttributesValues(String,String[])}.
@return The value of the specified attribute if one is found - Also works if the doc contains
several pairs {tag, attribute}. In this case, <I>the first tag encountered is returned</I>.
<BR>Returns null in the following cases :
<LI>if 'tagName' or 'attributeName' is null ;</LI>
<LI>if the document contains more than one tag 'tagName' ;</LI>
<BR>if there is no such pair {tag, attribute} (tag or attribute not found).
*/
public String getAttributeValue(String tagName, String attributeName){
// Parameter checking
//if (tagName == null) throw new IllegalArgumentException("Argument 'tagName' can't be null.");
//if (attributeName == null) throw new IllegalArgumentException("Argument 'attributeName' can't be null.");
if (tagName == null || attributeName == null) return null;
NodeList nodeList;
Node node;
NamedNodeMap nnm;
int len;
nodeList = _doc.getElementsByTagName(tagName);
len = nodeList.getLength();
if(len == 0) return null;
// Find the first element of the list corresponding to current path.
int curIndex = 0;
while(!belongsToPath(nodeList.item(curIndex), _curPath))
curIndex++;
node = nodeList.item(curIndex);
nnm = node.getAttributes();
if(nnm == null){
//sendError("Error : tag " + tagName + " must have one attribute " + attributeName + ".");
return null;
}
else{
for(int i = 0; i < nnm.getLength(); i++){
node = nnm.item(i);
if(node.getNodeName().equals(attributeName)) return node.getNodeValue();
}
}
//sendError("Error : tag " + tagName + " must have one attribute " + attributeName + ".");
return null;
}// end getAttributeValue
//*************** getAttributesValues(String, String[]) ***************
/** Method to get the values of tags attributes. Return results for tags of the current path.
<BR>This method can be used to retrieve the attribute values of tags named 'tagName'.
<BR>Example :
<PRE>
<myXmlDoc>
...
<theTag attr1 = "val1.1" attr2 = "val1.2" attr3="val1.3"/>
<theTag attr1 = "val2.1" attr3="val2.3"/>
<theTag attr1 = "val3.1" attr2 = "val3.2" attr3="val3.3"/>
...
</myXmlDoc>
</PRE>
The code :
<PRE>
String[] attributeNames = new String[]{"attr1", "attr2"};
getAttributesValues("theTag", attributeNames);
</PRE>
will return a String[] containing :
<PRE>
{
{"val1.1", "val1.2"},
{"val2.1", null},
{"val3.1", "val3.2"}
}
</PRE>
@return the list as specified above or null if the document doesn't contain 'tagName' tags.
<BR>If attributes are missing, null is put to the corresponding place of the the resulting array.
<BR>Returns null if parameter 'tagName' is null, if 'attributeNames' is null or empty
*/
public String[][] getAttributesValues(String tagName, String[] attributeNames) throws Exception{
// Parameter checking
if (tagName == null || attributeNames == null || attributeNames.length == 0) return null;
//if (tagName == null) throw new IllegalArgumentException("Argument 'tagName' can't be null.");
//if (attributeNames == null) throw new IllegalArgumentException("Argument 'attributeNames' can't be null.");
NodeList nodeList;
Node node;
NamedNodeMap nnm;
Vector res = new Vector();
// method : get all the 'tagName' tags ; add in the Vector only those in the current path
// and build a String[][] from the Vector
nodeList = _doc.getElementsByTagName(tagName);
int nlSize = nodeList.getLength();
if(nlSize == 0) return null;
int nbAttrs = attributeNames.length;
// Loop on the tags of the nodeList
for(int i = 0; i < nlSize; i++){
node = nodeList.item(i);
if(!belongsToPath(nodeList.item(i), _curPath)) continue;
String[] attrs = new String[nbAttrs];
nnm = node.getAttributes();
// Loop on the attributes
for(int j = 0; j < nnm.getLength(); j++){
node = nnm.item(j);
// Loop on parameter 'attributeNames'
for(int k = 0; k < attributeNames.length; k++){
if(node.getNodeName().equals(attributeNames[k]))
attrs[k] = node.getNodeValue();
}// end for k
}// end for j
res.add(attrs);
}// end for i
int nbTags = res.size();
if(nbTags == 0) return null;
String[][] res2 = new String[nbTags][nbAttrs];
for(int i = 0; i < nbTags; i++){
res2[i] = (String[])res.get(i);
}
return res2;
}// end getAttributesValues(String, String[])
//*************** getAttributesValues(String, String) ***************
/** Equivalent to {@link #getAttributesValues(String,String[])} when 'attributeNames' contains only element,
or when a <I>regular expression</I> is used to designate the attribute names.
<BR>The most common use of regular expressions is to use a wildcard ('*') to replace any sequence of character.
For more details, see documentation of class <CODE>java.util.regex.Pattern</CODE>.
<BR>WARNING : the returned array is not filled with 'null' if the match is not found, like in
{@link #getAttributesValues(String,String[])}.
<BR>Example :
<PRE>
<myXmlDoc>
...
<theTag attr1 = "val1.1" attr2 = "val1.2" attr3="val1.3"/>
<theTag attr1 = "val2.1" attr3="val2.3"/>
<theTag attr1 = "val3.1" attr2 = "val3.2" attr3="val3.3"/>
...
</myXmlDoc>
</PRE>
The code :
<PRE>
getAttributesValues("theTag", "attr*");
</PRE>
will return a String[] containing :
<PRE>
{
{"val1.1", "val1.2", "val1.3"},
{"val2.1", "val2.3"},
{"val3.1", "val3.2", "val3.3"}
}
</PRE>
*/
public String[][] getAttributesValues(String tagName, String attributeNames){
// Parameter checking
if (tagName == null || attributeNames == null) return null;
//if (tagName == null) throw new IllegalArgumentException("Argument 'tagName' can't be null.");
//if (attributeNames == null) throw new IllegalArgumentException("Argument 'attributeNames' can't be null.");
// modify 'attributeNames' depending on regExMode
if(_regExMode == REGEX_WILDCARD_ONLY)
attributeNames = Strings.replace(attributeNames, WILDCARD, POINT_WILDCARD);
NodeList nodeList;
Node node;
NamedNodeMap nnm;
int len;
Vector res = new Vector();
Pattern pattern = Pattern.compile(attributeNames);
//Matcher matcher;
// method : get all the 'tagName' tags ; add in the Vector only those in the current path
// and build a String[][] from the Vector
nodeList = _doc.getElementsByTagName(tagName);
int nlSize = nodeList.getLength();
//System.out.println("nlSize = " + nlSize);
if(nlSize == 0) return null;
// Loop on the tags of the nodeList
for(int i = 0; i < nlSize; i++){
node = nodeList.item(i);
if(!belongsToPath(nodeList.item(i), _curPath)) continue;
Vector attrs = new Vector();
nnm = node.getAttributes();
// Loop on the attributes
//System.out.println("nb attrs = " + nnm.getLength());
for(int j = 0; j < nnm.getLength(); j++){
node = nnm.item(j);
// check if the attribute matches the pattern
Matcher matcher = pattern.matcher(node.getNodeName());
if(matcher.matches())
attrs.add(node.getNodeValue());
//System.out.println(" attr = " + node.getNodeValue() + " - match = " + (matcher.matches() ? "YES" : "NO"));
}// end for j
res.add(attrs);
}// end for i
// Transform to a String[][]
int nbTags = res.size();
Vector curV;
if(nbTags == 0) return null;
String[][] res2 = new String[nbTags][];
for(int i = 0; i < nbTags; i++){
curV = (Vector)res.get(i);
len = curV.size();
res2[i] = new String[len];
for(int j = 0; j < len; j++)
res2[i][j] = (String)curV.get(j);
}
return res2;
}// end getAttributesValues(String, String)
//=================================================================================
// STATIC METHODS
//=================================================================================
//********************************************************
// METHOD TO PUT ELSEWHERE
private static void sendError(String str) throws Exception{
throw new Exception(str);
}
//********************************************************
/** Returns a String representing the path of a node.
<BR>The String is built with the names of the parent nodes separated by "/". */
public static String getPath(Node node){
if(node == null) throw new IllegalArgumentException("getPath can't be called with a null node");
StringBuffer path = new StringBuffer();
Vector parentNames = new Vector();
Node curNode = node.getParentNode(); // start to build the path with the parent node
if(curNode == null) return BLANK;
while(curNode.getParentNode() != null){
parentNames.add(curNode.getNodeName());
curNode = curNode.getParentNode();
}
int n = parentNames.size();
for(int i = 0; i < n; i++){
path.append((String)(parentNames.elementAt(n - 1 - i)));
if (i < n - 1) path.append(SLASH);
}
return path.toString();
}// end getPath
//********************************************************
/** Indicates if a node belongs to a given path. */
public static boolean belongsToPath(Node node, String path){
if(node == null) throw new IllegalArgumentException("belongsToPath can't be called with a null node");
if(path == null) throw new IllegalArgumentException("belongsToPath can't be called with a null path");
if(path.equals(BLANK)) return true;
// Transform the paths to arrays.
String[] aPath = Strings.stringToStringArray(path, '/');
String[] aNodePath = Strings.stringToStringArray(getPath(node), '/');
int pathLen = aPath.length;
int nodePathLen = aNodePath.length;
if (nodePathLen < pathLen) return false;
boolean res = true;
for(int i = 0; i < pathLen; i++){
if(!aPath[i].equals(aNodePath[i])) res = false;
}
return res;
}// end belongsToPath
//=================================================================================
//=================================================================================
// TESTS
//=================================================================================
//=================================================================================
// **************** For tests only ****************
public static void main(String[] args){
// no complete argument checking
if(args[0].equalsIgnoreCase("testGetAttributeValue"))
testGetAttributeValue(args[1], args[2], args[3]);
else if(args[0].equalsIgnoreCase("testGetAttributesValues"))
testGetAttributesValues(args[1], args[2], args[3]);
else if(args[0].equalsIgnoreCase("testGetAttributesValues2"))
testGetAttributesValues2(args[1], args[2], args[3]);
else{
String possibleArgs = "'testGetAttributeValue' or 'testGetAttributesValues' or 'testGetAttributesValues2'";
System.out.println("first argument must be " + possibleArgs);
}
}// end main
// **************** For tests only ****************
//D:\Programs\java\jdk1.4\bin\java -classpath .;bin tig.xml.DOMDocument testGetAttributeValue test.xml trail name
private static void testGetAttributeValue(String xmlFile, String tagName, String attrName){
try{
// Build the document
javax.xml.parsers.DocumentBuilderFactory factory = javax.xml.parsers.DocumentBuilderFactory.newInstance();
javax.xml.parsers.DocumentBuilder builder = factory.newDocumentBuilder();
Document doc;
doc = builder.parse( new java.io.File(xmlFile) );
// test the method
DomDoc domDoc = new DomDoc(doc);
System.out.println(domDoc.getAttributeValue(tagName, attrName));
}
catch(org.xml.sax.SAXParseException spe){
System.out.println("Parse error occured - line " + spe.getLineNumber() + " col " + spe.getColumnNumber() + " :");
spe.printStackTrace();
}
catch(Exception e){
e.printStackTrace();
}
}// end testGetAttributeValue
// **************** For tests only ****************
private static void testGetAttributesValues(String xmlFile, String tagName, String attrNames){
try{
// Build the document
javax.xml.parsers.DocumentBuilderFactory factory = javax.xml.parsers.DocumentBuilderFactory.newInstance();
javax.xml.parsers.DocumentBuilder builder = factory.newDocumentBuilder();
Document doc;
doc = builder.parse( new java.io.File(xmlFile) );
// test the method
DomDoc domDoc = new DomDoc(doc);
// build attrNames from the parameter
String[] attrNames2 = tig.Strings.stringToStringArray(attrNames);
// call the method and trace
String[][] res = domDoc.getAttributesValues(tagName, attrNames2);
for(int i = 0; i < res.length; i++){
for(int j = 0; j < res[i].length; j++){
if(res[i][j] != null)
System.out.println("res[" + i + "][" + j + "] = " + res[i][j]);
else
System.out.println("res[" + i + "][" + j + "] = null");
}
}
}
catch(org.xml.sax.SAXParseException spe){
System.out.println("Parse error occured - line " + spe.getLineNumber() + " col " + spe.getColumnNumber() + " :");
spe.printStackTrace();
}
catch(Exception e){
e.printStackTrace();
}
}// end testGetAttributesValues
// **************** For tests only ****************
// tests the (String, String) version
private static void testGetAttributesValues2(String xmlFile, String tagName, String attrNames){
try{
// Build the document
javax.xml.parsers.DocumentBuilderFactory factory = javax.xml.parsers.DocumentBuilderFactory.newInstance();
javax.xml.parsers.DocumentBuilder builder = factory.newDocumentBuilder();
Document doc;
doc = builder.parse( new java.io.File(xmlFile) );
// test the method
DomDoc domDoc = new DomDoc(doc);
// build attrNames from the parameter
// call the method and trace
String[][] res = domDoc.getAttributesValues(tagName, attrNames);
for(int i = 0; i < res.length; i++){
for(int j = 0; j < res[i].length; j++){
if(res[i][j] != null)
System.out.println("res[" + i + "][" + j + "] = " + res[i][j]);
else
System.out.println("res[" + i + "][" + j + "] = null");
}
}
}
catch(org.xml.sax.SAXParseException spe){
System.out.println("Parse error occured - line " + spe.getLineNumber() + " col " + spe.getColumnNumber() + " :");
spe.printStackTrace();
}
catch(Exception e){
e.printStackTrace();
}
}// end testGetAttributesValues2
}// end class DomDoc