r2072 - in trunk/nuiton-utils/src: main/java/org/nuiton/util test/java/org/nuiton/util
Author: echatellier Date: 2011-02-03 15:10:58 +0100 (Thu, 03 Feb 2011) New Revision: 2072 Url: http://nuiton.org/repositories/revision/nuiton-utils/2072 Log: Unaccent method remove whitespaces Modified: trunk/nuiton-utils/src/main/java/org/nuiton/util/StringUtil.java trunk/nuiton-utils/src/test/java/org/nuiton/util/StringUtilTest.java Modified: trunk/nuiton-utils/src/main/java/org/nuiton/util/StringUtil.java =================================================================== --- trunk/nuiton-utils/src/main/java/org/nuiton/util/StringUtil.java 2011-01-31 13:32:19 UTC (rev 2071) +++ trunk/nuiton-utils/src/main/java/org/nuiton/util/StringUtil.java 2011-02-03 14:10:58 UTC (rev 2072) @@ -42,6 +42,7 @@ import java.security.NoSuchAlgorithmException; import java.text.DateFormat; import java.text.MessageFormat; +import java.text.Normalizer; import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; @@ -73,42 +74,9 @@ * @return la chaine sans accent */ static public String unaccent(String s) { - String result = ""; - for (char c : s.toCharArray()) { - if ("éèêë".indexOf(c) != -1) { - result += "e"; - } else if ("àäâ".indexOf(c) != -1) { - result += "a"; - } else if ("ç".indexOf(c) != -1) { - result += "c"; - } else if ("îï".indexOf(c) != -1) { - result += "i"; - } else if ("ôö".indexOf(c) != -1) { - result += "o"; - } else if ("ùûü".indexOf(c) != -1) { - result += "u"; - } else if ("ÉÈÊË".indexOf(c) != -1) { - result += "E"; - } else if ("ÀÂÄ".indexOf(c) != -1) { - result += "A"; - } else if ("Ç".indexOf(c) != -1) { - result += "C"; - } else if ("ÎÏ".indexOf(c) != -1) { - result += "I"; - } else if ("ÔÖ".indexOf(c) != -1) { - result += "O"; - } else if ("ÙÛÜ".indexOf(c) != -1) { - result += "U"; - } else if ('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' - || '0' <= c && c <= '9' || '.' == c || '-' == c - || '_' == c) { - result += c; - } else { - // on ne l'ajoute pas a result donc on supprime le caractere - // result += encodeUTF(c); - } - } - return result; + // this is java 1.6 code + String temp = Normalizer.normalize(s, Normalizer.Form.NFD); + return temp.replaceAll("[^\\p{ASCII}]",""); } /** Modified: trunk/nuiton-utils/src/test/java/org/nuiton/util/StringUtilTest.java =================================================================== --- trunk/nuiton-utils/src/test/java/org/nuiton/util/StringUtilTest.java 2011-01-31 13:32:19 UTC (rev 2071) +++ trunk/nuiton-utils/src/test/java/org/nuiton/util/StringUtilTest.java 2011-02-03 14:10:58 UTC (rev 2072) @@ -172,14 +172,17 @@ } /** - * FIXME EC20100928 : this test and method is buggy (should not remove spaces). + * Since 2.1, this method doesn't not remove whitespaces. * * @throws Exception */ @Test public void testUnaccent() throws Exception { - assertEquals("-aaaeeeeiioouuuc0123456789AAAEEEEIIOOUUUC._", + assertEquals("-aaaeeeeiioouuuc 0123456789 AAAEEEEIIOOUUUC._", StringUtil.unaccent("-àâäéèêëîïôöùûüç 0123456789 ÀÂÄÉÈÊËÎÏÔÖÙÛÜÇ._")); + + assertEquals("test en ete", + StringUtil.unaccent("test en été")); } @Test
participants (1)
-
echatellier@users.nuiton.org