Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||||||
StringLengthFunction |
|
| 4.666666666666667;4.667 |
1 | /* |
|
2 | * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/function/StringLengthFunction.java,v 1.10 2005/06/28 13:44:45 elharo Exp $ |
|
3 | * $Revision: 1.10 $ |
|
4 | * $Date: 2005/06/28 13:44:45 $ |
|
5 | * |
|
6 | * ==================================================================== |
|
7 | * |
|
8 | * Copyright (C) 2000-2002 bob mcwhirter & James Strachan. |
|
9 | * All rights reserved. |
|
10 | * |
|
11 | * Redistribution and use in source and binary forms, with or without |
|
12 | * modification, are permitted provided that the following conditions |
|
13 | * are met: |
|
14 | * |
|
15 | * 1. Redistributions of source code must retain the above copyright |
|
16 | * notice, this list of conditions, and the following disclaimer. |
|
17 | * |
|
18 | * 2. Redistributions in binary form must reproduce the above copyright |
|
19 | * notice, this list of conditions, and the disclaimer that follows |
|
20 | * these conditions in the documentation and/or other materials |
|
21 | * provided with the distribution. |
|
22 | * |
|
23 | * 3. The name "Jaxen" must not be used to endorse or promote products |
|
24 | * derived from this software without prior written permission. For |
|
25 | * written permission, please contact license@jaxen.org. |
|
26 | * |
|
27 | * 4. Products derived from this software may not be called "Jaxen", nor |
|
28 | * may "Jaxen" appear in their name, without prior written permission |
|
29 | * from the Jaxen Project Management (pm@jaxen.org). |
|
30 | * |
|
31 | * In addition, we request (but do not require) that you include in the |
|
32 | * end-user documentation provided with the redistribution and/or in the |
|
33 | * software itself an acknowledgement equivalent to the following: |
|
34 | * "This product includes software developed by the |
|
35 | * Jaxen Project <http://www.jaxen.org/>." |
|
36 | * Alternatively, the acknowledgment may be graphical using the logos |
|
37 | * available at http://www.jaxen.org/ |
|
38 | * |
|
39 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
|
40 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
|
41 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|
42 | * DISCLAIMED. IN NO EVENT SHALL THE Jaxen AUTHORS OR THE PROJECT |
|
43 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
44 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
45 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
|
46 | * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|
47 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
|
48 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
|
49 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
50 | * SUCH DAMAGE. |
|
51 | * |
|
52 | * ==================================================================== |
|
53 | * This software consists of voluntary contributions made by many |
|
54 | * individuals on behalf of the Jaxen Project and was originally |
|
55 | * created by bob mcwhirter <bob@werken.com> and |
|
56 | * James Strachan <jstrachan@apache.org>. For more information on the |
|
57 | * Jaxen Project, please see <http://www.jaxen.org/>. |
|
58 | * |
|
59 | * $Id: StringLengthFunction.java,v 1.10 2005/06/28 13:44:45 elharo Exp $ |
|
60 | */ |
|
61 | ||
62 | ||
63 | package org.jaxen.function; |
|
64 | ||
65 | import java.util.List; |
|
66 | ||
67 | import org.jaxen.Context; |
|
68 | import org.jaxen.Function; |
|
69 | import org.jaxen.FunctionCallException; |
|
70 | import org.jaxen.Navigator; |
|
71 | ||
72 | /** |
|
73 | * <p><b>4.2</b> <code><i>number</i> string-length(<i>string</i>)</code></p> |
|
74 | * |
|
75 | * <p> |
|
76 | * The <b>string-length</b> function returns the number of <strong>Unicode characters</strong> |
|
77 | * in its argument. This is <strong>not</strong> necessarily |
|
78 | * the same as the number <strong>Java chars</strong> |
|
79 | * in the corresponding Java string. In particular, if the Java <code>String</code> |
|
80 | * contains surrogate pairs each such pair will be counted as only one character |
|
81 | * by this function. If the argument is omitted, |
|
82 | * it returns the length of the string-value of the context node. |
|
83 | * </p> |
|
84 | * |
|
85 | * @author bob mcwhirter (bob @ werken.com) |
|
86 | * @see <a href="http://www.w3.org/TR/xpath#function-string-length" target="_top">Section |
|
87 | * 4.2 of the XPath Specification</a> |
|
88 | */ |
|
89 | public class StringLengthFunction implements Function |
|
90 | { |
|
91 | ||
92 | ||
93 | /** |
|
94 | * Create a new <code>StringLengthFunction</code> object. |
|
95 | */ |
|
96 | 294 | public StringLengthFunction() {} |
97 | ||
98 | ||
99 | /** |
|
100 | * <p> |
|
101 | * Returns the number of Unicode characters in the string-value of the argument. |
|
102 | * </p> |
|
103 | * |
|
104 | * @param context the context at the point in the |
|
105 | * expression when the function is called |
|
106 | * @param args a list containing the item whose string-value is to be counted. |
|
107 | * If empty, the length of the context node's string-value is returned. |
|
108 | * |
|
109 | * @return a <code>Double</code> giving the number of Unicode characters |
|
110 | * |
|
111 | * @throws FunctionCallException if args has more than one item |
|
112 | */ |
|
113 | public Object call(Context context, |
|
114 | List args) throws FunctionCallException |
|
115 | { |
|
116 | 42 | if (args.size() == 0) |
117 | { |
|
118 | 6 | return evaluate( context.getNodeSet(), |
119 | context.getNavigator() ); |
|
120 | } |
|
121 | 36 | else if (args.size() == 1) |
122 | { |
|
123 | 30 | return evaluate( args.get(0), |
124 | context.getNavigator() ); |
|
125 | } |
|
126 | ||
127 | 6 | throw new FunctionCallException( "string-length() requires one argument." ); |
128 | } |
|
129 | ||
130 | /** |
|
131 | * <p> |
|
132 | * Returns the number of Unicode characters in the string-value of |
|
133 | * an object. |
|
134 | * </p> |
|
135 | * |
|
136 | * @param obj the object whose string-value is counted |
|
137 | * @param nav used to calculate the string-values of the first two arguments |
|
138 | * |
|
139 | * @return a <code>Double</code> giving the number of Unicode characters |
|
140 | * |
|
141 | * @throws FunctionCallException if the string contains mismatched surrogates |
|
142 | */ |
|
143 | public static Double evaluate(Object obj, Navigator nav) throws FunctionCallException |
|
144 | { |
|
145 | ||
146 | // could/should I push the mismnatching checks into StringFunction.evaluate()???? |
|
147 | 336 | String str = StringFunction.evaluate( obj, nav ); |
148 | // String.length() counts UTF-16 code points; not Unicode characters |
|
149 | 336 | char[] data = str.toCharArray(); |
150 | 336 | int length = 0; |
151 | 1818 | for (int i = 0; i < data.length; i++) { |
152 | 1488 | char c = data[i]; |
153 | 1488 | length++; |
154 | // if this is a high surrogate; assume the next character is |
|
155 | // is a low surrogate and skip it |
|
156 | 1488 | if (c >= 0xD800) { |
157 | try { |
|
158 | 30 | char low = data[i+1]; |
159 | 30 | if (low < 0xDC00 || low > 0xDFFF) { |
160 | 6 | throw new FunctionCallException("Bad surrogate pair in string " + str); |
161 | } |
|
162 | 24 | i++; // increment past low surrogate |
163 | } |
|
164 | 0 | catch (ArrayIndexOutOfBoundsException ex) { |
165 | 0 | throw new FunctionCallException("Bad surrogate pair in string " + str); |
166 | 24 | } |
167 | } |
|
168 | } |
|
169 | 330 | return new Double(length); |
170 | } |
|
171 | ||
172 | } |