Author: chatellier Date: 2009-06-30 10:39:17 +0000 (Tue, 30 Jun 2009) New Revision: 2446 Modified: isis-fish/trunk/pom.xml isis-fish/trunk/src/main/java/fr/ifremer/isisfish/datastore/DataStorage.java isis-fish/trunk/src/main/java/fr/ifremer/isisfish/util/CompileHelper.java isis-fish/trunk/src/test/java/fr/ifremer/isisfish/util/CompileHelperTest.java Log: Add codepage converter (code and tests) Modified: isis-fish/trunk/pom.xml =================================================================== --- isis-fish/trunk/pom.xml 2009-06-30 08:29:44 UTC (rev 2445) +++ isis-fish/trunk/pom.xml 2009-06-30 10:39:17 UTC (rev 2446) @@ -318,6 +318,13 @@ </dependency> <dependency> + <groupId>net.sourceforge.cpdetector</groupId> + <artifactId>cpdetector</artifactId> + <version>1.0.7</version> + <scope>compile</scope> + </dependency> + + <dependency> <groupId>com.sun</groupId> <artifactId>tools</artifactId> <version>1.6.0</version> Modified: isis-fish/trunk/src/main/java/fr/ifremer/isisfish/datastore/DataStorage.java =================================================================== --- isis-fish/trunk/src/main/java/fr/ifremer/isisfish/datastore/DataStorage.java 2009-06-30 08:29:44 UTC (rev 2445) +++ isis-fish/trunk/src/main/java/fr/ifremer/isisfish/datastore/DataStorage.java 2009-06-30 10:39:17 UTC (rev 2446) @@ -348,6 +348,7 @@ * Cree un zip. Le zip contient les memes fichiers lorsqu'on utilise le CVS. * * @param file le fichier dans lequel l'export doit se faire + * @param createMD5 if <tt>true</tt> create zip md5 * @return l'argument file * @throws IOException si problème lors de la création du zip */ Modified: isis-fish/trunk/src/main/java/fr/ifremer/isisfish/util/CompileHelper.java =================================================================== --- isis-fish/trunk/src/main/java/fr/ifremer/isisfish/util/CompileHelper.java 2009-06-30 08:29:44 UTC (rev 2445) +++ isis-fish/trunk/src/main/java/fr/ifremer/isisfish/util/CompileHelper.java 2009-06-30 10:39:17 UTC (rev 2446) @@ -20,12 +20,27 @@ package fr.ifremer.isisfish.util; import static org.codelutin.i18n.I18n._; +import info.monitorenter.cpdetector.io.ASCIIDetector; +import info.monitorenter.cpdetector.io.ByteOrderMarkDetector; +import info.monitorenter.cpdetector.io.CodepageDetectorProxy; +import info.monitorenter.cpdetector.io.JChardetFacade; +import info.monitorenter.cpdetector.io.ParsingDetector; import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.OutputStreamWriter; import java.io.PrintWriter; +import java.io.Reader; +import java.io.Writer; import java.lang.reflect.Method; +import java.net.MalformedURLException; import java.net.URL; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -36,6 +51,7 @@ import javax.tools.JavaFileObject; import javax.tools.StandardJavaFileManager; +import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -65,6 +81,9 @@ /** Logger for this class. */ private static final Log log = LogFactory.getLog(CompileHelper.class); + /** CP detector proxy. */ + protected static CodepageDetectorProxy detector; + /** * Recherche tous les fichiers qui un source plus recent que la version compilé. * @@ -197,7 +216,7 @@ protected static int compile(List<File> classpath, Collection<File> src, File dest, PrintWriter out) { dest.mkdirs(); - + int result = -1000; try { JavaCompiler compiler = JavacTool.create(); @@ -243,7 +262,9 @@ * <li>System.getProperty("java.class.path") * <li>All first jar dependency (META-INF/MANIFEST.MF) * </ul> - * @return classptah as string + * @param classpath initial classpath + * @return classpath as string + * @throws Exception */ public static String getClassPathAsString(List<File> classpath) throws Exception { @@ -351,4 +372,97 @@ String result = "<a href='" + ref + "'>" + text + "</a>"; return result; } + + protected static CodepageDetectorProxy getCodepageDetector() { + + if (detector == null) { + detector = CodepageDetectorProxy.getInstance(); // A singleton. + + // Add the implementations of info.monitorenter.cpdetector.io.ICodepageDetector: + // This one is quick if we deal with unicode codepages: + detector.add(new ByteOrderMarkDetector()); + // The first instance delegated to tries to detect the meta charset attribut in html pages. + detector.add(new ParsingDetector(true)); // be verbose about parsing. + // This one does the tricks of exclusion and frequency detection, if first implementation is + // unsuccessful: + detector.add(JChardetFacade.getInstance()); // Another singleton. + detector.add(ASCIIDetector.getInstance()); // Fallback, see javadoc. + } + return detector; + } + + + /** + * Convert all files to UTF-8. + * + * @param files fiels to convert + * @return converted file list + */ + public static List<File> convertToUnicode(List<File> files) { + + CodepageDetectorProxy myDetector = getCodepageDetector(); + + for (File file : files) { + try { + Charset charset = myDetector.detectCodepage(file.toURI().toURL()); + + if (log.isDebugEnabled()) { + log.debug("Charset for " + file.getAbsolutePath() + " is " + charset); + } + + if (charset != null && !charset.name().equalsIgnoreCase("utf-8")) { + + if (log.isDebugEnabled()) { + log.debug("Convert " + file.getAbsolutePath() + " to unicode"); + } + + File tmpFile = File.createTempFile(file.getName(), ".copy"); + tmpFile.deleteOnExit(); + + // direct copy + InputStream is = new FileInputStream(file); + OutputStream os = new FileOutputStream(tmpFile); + try { + IOUtils.copy(is, os); + } + finally { + is.close(); + os.close(); + } + + // copy using cp transaltion + is = new FileInputStream(tmpFile); + os = new FileOutputStream(file); + Reader ir = new InputStreamReader(is, charset); + Writer ow = new OutputStreamWriter(new FileOutputStream(file), "utf-8"); + try { + IOUtils.copy(ir, ow); + } + finally { + ir.close(); + ow.close(); + is.close(); + os.close(); + } + + } + else { + if (log.isDebugEnabled()) { + log.debug("File " + file.getAbsolutePath() + " already in unicode : skip"); + } + } + } catch (MalformedURLException e) { + if (log.isErrorEnabled()) { + log.error("Can't convert file in unicode", e); + } + } catch (IOException e) { + if (log.isErrorEnabled()) { + log.error("Can't convert file in unicode", e); + } + } + + } + + return files; + } } Modified: isis-fish/trunk/src/test/java/fr/ifremer/isisfish/util/CompileHelperTest.java =================================================================== --- isis-fish/trunk/src/test/java/fr/ifremer/isisfish/util/CompileHelperTest.java 2009-06-30 08:29:44 UTC (rev 2445) +++ isis-fish/trunk/src/test/java/fr/ifremer/isisfish/util/CompileHelperTest.java 2009-06-30 10:39:17 UTC (rev 2446) @@ -25,12 +25,16 @@ import java.util.Collections; import java.util.List; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.codelutin.util.FileUtil; import org.junit.Assert; import org.junit.Test; +import com.sun.source.tree.AssertTree; + import fr.ifremer.isisfish.AbstractIsisFishTest; /** @@ -172,4 +176,61 @@ fB.delete(); FileUtil.deleteRecursively(dest); } + + /** + * Save a file as non utf-8 encoded. + * Convert to uft-8. + * Assert that file has been converted. + * @throws IOException + */ + @Test + public void convertToUnicodeTest() throws IOException { + String content = getFirstClassContent("ConvertToUnicode"); + + // add un accent + content = content.replace("code", "cod\u00E9"); + + File nonUnicodeFile = File.createTempFile("ConvertToUnicode", ".java", getTestDirectory()); + FileUtil.writeString(nonUnicodeFile, content, "ISO-8859-15"); + + if (log.isDebugEnabled()) { + log.debug("Saved file " + nonUnicodeFile.getAbsolutePath() + " as ISO-8859-15"); + } + + CompileHelper.convertToUnicode(Collections.singletonList(nonUnicodeFile)); + + // don't support encoding String newContent = FileUtil.readAsString(nonUnicodeFile); + String newContent = FileUtils.readFileToString(nonUnicodeFile, "UTF-8"); + Assert.assertTrue(newContent.indexOf("cod\u00E9") > 0); + + nonUnicodeFile.delete(); + } + + /** + * Try to convert file already in UTF-8. + * + * @throws IOException + */ + @Test + public void convertToUnicodeUselessTest() throws IOException { + String content = getFirstClassContent("UselessConvertToUnicode"); + + // add un accent + content = content.replace("code", "cod\u00E9"); + + File nonUnicodeFile = File.createTempFile("UselessConvertToUnicode", ".java", getTestDirectory()); + FileUtil.writeString(nonUnicodeFile, content, "UTF-8"); + + if (log.isDebugEnabled()) { + log.debug("Saved file " + nonUnicodeFile.getAbsolutePath() + " as UTF-8"); + } + + CompileHelper.convertToUnicode(Collections.singletonList(nonUnicodeFile)); + + // don't support encoding String newContent = FileUtil.readAsString(nonUnicodeFile); + String newContent = FileUtils.readFileToString(nonUnicodeFile, "UTF-8"); + Assert.assertTrue(newContent.indexOf("cod\u00E9") > 0); + + nonUnicodeFile.delete(); + } }
participants (1)
-
chatellier@users.labs.libre-entreprise.org