Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||||||
TranslateFunction |
|
| 3.8333333333333335;3.833 |
1 | /* |
|
2 | * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/function/TranslateFunction.java,v 1.9 2005/06/28 13:44:45 elharo Exp $ |
|
3 | * $Revision: 1.9 $ |
|
4 | * $Date: 2005/06/28 13:44:45 $ |
|
5 | * |
|
6 | * ==================================================================== |
|
7 | * |
|
8 | * Copyright (C) 2000-2002 bob mcwhirter & James Strachan. |
|
9 | * All rights reserved. |
|
10 | * |
|
11 | * Redistribution and use in source and binary forms, with or without |
|
12 | * modification, are permitted provided that the following conditions |
|
13 | * are met: |
|
14 | * |
|
15 | * 1. Redistributions of source code must retain the above copyright |
|
16 | * notice, this list of conditions, and the following disclaimer. |
|
17 | * |
|
18 | * 2. Redistributions in binary form must reproduce the above copyright |
|
19 | * notice, this list of conditions, and the disclaimer that follows |
|
20 | * these conditions in the documentation and/or other materials |
|
21 | * provided with the distribution. |
|
22 | * |
|
23 | * 3. The name "Jaxen" must not be used to endorse or promote products |
|
24 | * derived from this software without prior written permission. For |
|
25 | * written permission, please contact license@jaxen.org. |
|
26 | * |
|
27 | * 4. Products derived from this software may not be called "Jaxen", nor |
|
28 | * may "Jaxen" appear in their name, without prior written permission |
|
29 | * from the Jaxen Project Management (pm@jaxen.org). |
|
30 | * |
|
31 | * In addition, we request (but do not require) that you include in the |
|
32 | * end-user documentation provided with the redistribution and/or in the |
|
33 | * software itself an acknowledgement equivalent to the following: |
|
34 | * "This product includes software developed by the |
|
35 | * Jaxen Project <http://www.jaxen.org/>." |
|
36 | * Alternatively, the acknowledgment may be graphical using the logos |
|
37 | * available at http://www.jaxen.org/ |
|
38 | * |
|
39 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
|
40 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
|
41 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|
42 | * DISCLAIMED. IN NO EVENT SHALL THE Jaxen AUTHORS OR THE PROJECT |
|
43 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
44 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
45 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
|
46 | * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|
47 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
|
48 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
|
49 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
50 | * SUCH DAMAGE. |
|
51 | * |
|
52 | * ==================================================================== |
|
53 | * This software consists of voluntary contributions made by many |
|
54 | * individuals on behalf of the Jaxen Project and was originally |
|
55 | * created by bob mcwhirter <bob@werken.com> and |
|
56 | * James Strachan <jstrachan@apache.org>. For more information on the |
|
57 | * Jaxen Project, please see <http://www.jaxen.org/>. |
|
58 | * |
|
59 | * $Id: TranslateFunction.java,v 1.9 2005/06/28 13:44:45 elharo Exp $ |
|
60 | */ |
|
61 | ||
62 | ||
63 | package org.jaxen.function; |
|
64 | ||
65 | import java.util.HashMap; |
|
66 | import java.util.List; |
|
67 | import java.util.Map; |
|
68 | ||
69 | import org.jaxen.Context; |
|
70 | import org.jaxen.Function; |
|
71 | import org.jaxen.FunctionCallException; |
|
72 | import org.jaxen.Navigator; |
|
73 | ||
74 | /** |
|
75 | * <p> |
|
76 | * <b>4.2</b> |
|
77 | * <code><i>string</i> translate(<i>string</i>,<i>string</i>,<i>string</i>)</code> |
|
78 | * </p> |
|
79 | * |
|
80 | * <blockquote src="http://www.w3.org/TR/xpath#function-translate"> |
|
81 | * <p> |
|
82 | * The <b><a href="http://www.w3.org/TR/xpath#function-translate">translate</a></b> function |
|
83 | * returns the first argument string with occurrences of characters in |
|
84 | * the second argument string replaced by the character at the |
|
85 | * corresponding position in the third argument string. For example, |
|
86 | * <code>translate("bar","abc","ABC")</code> returns the string |
|
87 | * <code>BAr</code>. If there is a character in the second argument |
|
88 | * string with no character at a corresponding position in the third |
|
89 | * argument string (because the second argument string is longer than |
|
90 | * the third argument string), then occurrences of that character in the |
|
91 | * first argument string are removed. For example, |
|
92 | * <code>translate("--aaa--","abc-","ABC")</code> returns |
|
93 | * <code>"AAA"</code>. If a character occurs more than once in the |
|
94 | * second argument string, then the first occurrence determines the |
|
95 | * replacement character. If the third argument string is longer than |
|
96 | * the second argument string, then excess characters are ignored. |
|
97 | * </p> |
|
98 | * |
|
99 | * <blockquote> <b>NOTE: </b>The <b>translate</b> function is not a |
|
100 | * sufficient solution for case conversion in all languages. A future |
|
101 | * version of XPath may provide additional functions for case |
|
102 | * conversion.</blockquote> |
|
103 | * |
|
104 | * </blockquote> |
|
105 | * |
|
106 | * @author Jan Dvorak ( jan.dvorak @ mathan.cz ) |
|
107 | * |
|
108 | * @see <a href="http://www.w3.org/TR/xpath#function-translate" |
|
109 | * target="_top">Section 4.2 of the XPath Specification</a> |
|
110 | */ |
|
111 | public class TranslateFunction implements Function |
|
112 | { |
|
113 | ||
114 | /* The translation is done thru a HashMap. Performance tip (for anyone |
|
115 | * who needs to improve the performance of this particular function): |
|
116 | * Cache the HashMaps, once they are constructed. */ |
|
117 | ||
118 | /** |
|
119 | * Create a new <code>TranslateFunction</code> object. |
|
120 | */ |
|
121 | 294 | public TranslateFunction() {} |
122 | ||
123 | ||
124 | /** Returns a copy of the first argument in which |
|
125 | * characters found in the second argument are replaced by |
|
126 | * corresponding characters from the third argument. |
|
127 | * |
|
128 | * @param context the context at the point in the |
|
129 | * expression when the function is called |
|
130 | * @param args a list that contains exactly three items |
|
131 | * |
|
132 | * @return a <code>String</code> built from <code>args.get(0)</code> |
|
133 | * in which occurrences of characters in <code>args.get(1)</code> |
|
134 | * are replaced by the corresponding characters in <code>args.get(2)</code> |
|
135 | * |
|
136 | * @throws FunctionCallException if <code>args</code> does not have exactly three items |
|
137 | */ |
|
138 | public Object call(Context context, |
|
139 | List args) throws FunctionCallException |
|
140 | { |
|
141 | 306 | if (args.size() == 3) { |
142 | 300 | return evaluate( args.get(0), |
143 | args.get(1), |
|
144 | args.get(2), |
|
145 | context.getNavigator() ); |
|
146 | } |
|
147 | ||
148 | 6 | throw new FunctionCallException( "translate() requires three arguments." ); |
149 | } |
|
150 | ||
151 | /** |
|
152 | * Returns a copy of <code>strArg</code> in which |
|
153 | * characters found in <code>fromArg</code> are replaced by |
|
154 | * corresponding characters from <code>toArg</code>. |
|
155 | * If necessary each argument is first converted to it string-value |
|
156 | * as if by the XPath <code>string()</code> function. |
|
157 | * |
|
158 | * @param strArg the base string |
|
159 | * @param fromArg the characters to be replaced |
|
160 | * @param toArg the characters they will be replaced by |
|
161 | * @param nav the <code>Navigator</code> used to calculate the string-values of the arguments. |
|
162 | * |
|
163 | * @return a copy of <code>strArg</code> in which |
|
164 | * characters found in <code>fromArg</code> are replaced by |
|
165 | * corresponding characters from <code>toArg</code> |
|
166 | * |
|
167 | * @throws FunctionCallException if one of the arguments is a malformed Unicode string; |
|
168 | * that is, if surrogate characters don't line up properly |
|
169 | * |
|
170 | */ |
|
171 | public static String evaluate(Object strArg, |
|
172 | Object fromArg, |
|
173 | Object toArg, |
|
174 | Navigator nav) throws FunctionCallException |
|
175 | { |
|
176 | 300 | String inStr = StringFunction.evaluate( strArg, nav ); |
177 | 300 | String fromStr = StringFunction.evaluate( fromArg, nav ); |
178 | 300 | String toStr = StringFunction.evaluate( toArg, nav ); |
179 | ||
180 | // Initialize the mapping in a HashMap |
|
181 | 300 | Map characterMap = new HashMap(); |
182 | 300 | String[] fromCharacters = toUnicodeCharacters(fromStr); |
183 | 300 | String[] toCharacters = toUnicodeCharacters(toStr); |
184 | 288 | int fromLen = fromCharacters.length; |
185 | 288 | int toLen = toCharacters.length; |
186 | 1128 | for ( int i = 0; i < fromLen; i++ ) { |
187 | 840 | String cFrom = fromCharacters[i]; |
188 | 840 | if ( characterMap.containsKey( cFrom ) ) { |
189 | // We've seen the character before, ignore |
|
190 | 24 | continue; |
191 | } |
|
192 | ||
193 | 816 | if ( i < toLen ) { |
194 | // Will change |
|
195 | 672 | characterMap.put( cFrom, toCharacters[i] ); |
196 | } |
|
197 | else { |
|
198 | // Will delete |
|
199 | 144 | characterMap.put( cFrom, null ); |
200 | } |
|
201 | } |
|
202 | ||
203 | // Process the input string thru the map |
|
204 | 288 | StringBuffer outStr = new StringBuffer( inStr.length() ); |
205 | 288 | String[] inCharacters = toUnicodeCharacters(inStr); |
206 | 288 | int inLen = inCharacters.length; |
207 | 1314 | for ( int i = 0; i < inLen; i++ ) { |
208 | 1026 | String cIn = inCharacters[i]; |
209 | 1026 | if ( characterMap.containsKey( cIn ) ) { |
210 | 822 | String cTo = (String) characterMap.get( cIn ); |
211 | 822 | if ( cTo != null ) { |
212 | 678 | outStr.append( cTo ); |
213 | } |
|
214 | } |
|
215 | else { |
|
216 | 204 | outStr.append( cIn ); |
217 | } |
|
218 | } |
|
219 | ||
220 | 288 | return outStr.toString(); |
221 | } |
|
222 | ||
223 | private static String[] toUnicodeCharacters(String s) throws FunctionCallException { |
|
224 | ||
225 | 888 | String[] result = new String[s.length()]; |
226 | 888 | int stringLength = 0; |
227 | 3540 | for (int i = 0; i < s.length(); i++) { |
228 | 2664 | char c1 = s.charAt(i); |
229 | 2664 | if (isHighSurrogate(c1)) { |
230 | try { |
|
231 | 54 | char c2 = s.charAt(i+1); |
232 | 54 | if (isLowSurrogate(c2)) { |
233 | 42 | result[stringLength] = (c1 + "" + c2).intern(); |
234 | 42 | i++; |
235 | } |
|
236 | else { |
|
237 | 12 | throw new FunctionCallException("Mismatched surrogate pair in translate function"); |
238 | } |
|
239 | } |
|
240 | 0 | catch (StringIndexOutOfBoundsException ex) { |
241 | 0 | throw new FunctionCallException("High surrogate without low surrogate at end of string passed to translate function"); |
242 | 42 | } |
243 | } |
|
244 | else { |
|
245 | 2610 | result[stringLength]=String.valueOf(c1).intern(); |
246 | } |
|
247 | 2652 | stringLength++; |
248 | } |
|
249 | ||
250 | 876 | if (stringLength == result.length) return result; |
251 | ||
252 | // trim array |
|
253 | 42 | String[] trimmed = new String[stringLength]; |
254 | 42 | System.arraycopy(result, 0, trimmed, 0, stringLength); |
255 | 42 | return trimmed; |
256 | ||
257 | } |
|
258 | ||
259 | private static boolean isHighSurrogate(char c) { |
|
260 | 2664 | return c >= 0xD800 && c <= 0xDBFF; |
261 | } |
|
262 | ||
263 | private static boolean isLowSurrogate(char c) { |
|
264 | 54 | return c >= 0xDC00 && c <= 0xDFFF; |
265 | } |
|
266 | ||
267 | } |