From 5229bcc47d6cecd29b94b026d138539d071d4ed4 Mon Sep 17 00:00:00 2001 From: "Begerad, Stefan" Date: Tue, 5 Oct 2021 09:54:45 -0400 Subject: [PATCH] feat(dup-cleaner): initial commit --- duplicate-cleaner/.gitignore | 5 + duplicate-cleaner/pom.xml | 91 +++++++++++++++++++ .../de/begerad/duprm/DuplicateCleaner.java | 70 ++++++++++++++ .../src/main/java/de/begerad/duprm/Main.java | 47 ++++++++++ .../src/main/resources/log4j2.xml | 32 +++++++ .../begerad/duprm/TestDuplicateCleaner.java | 4 + .../src/test/resources/input.txt | 6 ++ 7 files changed, 255 insertions(+) create mode 100644 duplicate-cleaner/.gitignore create mode 100644 duplicate-cleaner/pom.xml create mode 100644 duplicate-cleaner/src/main/java/de/begerad/duprm/DuplicateCleaner.java create mode 100644 duplicate-cleaner/src/main/java/de/begerad/duprm/Main.java create mode 100644 duplicate-cleaner/src/main/resources/log4j2.xml create mode 100644 duplicate-cleaner/src/test/java/de/begerad/duprm/TestDuplicateCleaner.java create mode 100644 duplicate-cleaner/src/test/resources/input.txt diff --git a/duplicate-cleaner/.gitignore b/duplicate-cleaner/.gitignore new file mode 100644 index 0000000..d3954f9 --- /dev/null +++ b/duplicate-cleaner/.gitignore @@ -0,0 +1,5 @@ +.idea +*~ +log.txt +output.txt +target \ No newline at end of file diff --git a/duplicate-cleaner/pom.xml b/duplicate-cleaner/pom.xml new file mode 100644 index 0000000..324787e --- /dev/null +++ b/duplicate-cleaner/pom.xml @@ -0,0 +1,91 @@ + + + 4.0.0 + + dup-rm + remove duplicate lines from file + https://begerad.de + de.begerad.duprm + dup-rm + 0.0.1 + jar + + + + UTF-8 + + 1.8 + 1.8 + + + + + + org.junit.jupiter + junit-jupiter-engine + 5.7.2 + test + + + + org.apache.logging.log4j + log4j-slf4j-impl + 2.14.1 + + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.22.2 + + + + org.apache.maven.plugins + maven-shade-plugin + 3.2.4 + + + package + + + shade + + + + + *:* + + META-INF/*.MF + + + + true + depds + + + + + + de.begerad.duprm.Main + ${project.artifactId} + ${project.version} + ${project.artifactId} + ${project.version} + ${project.groupId} + + + + + + + + + + diff --git a/duplicate-cleaner/src/main/java/de/begerad/duprm/DuplicateCleaner.java b/duplicate-cleaner/src/main/java/de/begerad/duprm/DuplicateCleaner.java new file mode 100644 index 0000000..ad0ffa8 --- /dev/null +++ b/duplicate-cleaner/src/main/java/de/begerad/duprm/DuplicateCleaner.java @@ -0,0 +1,70 @@ +package de.begerad.duprm; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Scanner; +import java.util.Set; + +public class DuplicateCleaner { + + public final static Logger LOG = LoggerFactory.getLogger(DuplicateCleaner.class); + + public static void clean(final String pathInput, final String pathOutput) { + LOG.debug("clean() started."); + + String line; + + //instantiating the Scanner class + Scanner sc; + try { + sc = new Scanner(new File(pathInput)); + } catch (FileNotFoundException e) { + LOG.error("clean() ERROR while accessing " + pathInput + " file"); + LOG.error("clean() stack trace: " + e); + return; + } + + //instantiating the FileWriter class + FileWriter writer; + try { + writer = new FileWriter(pathOutput); + } catch (IOException e) { + LOG.error("clean() ERROR while accessing " + pathOutput + " file"); + LOG.error("clean() stack trace: " + e); + return; + } + + //instantiating the Set class + Set set = new HashSet<>(); + + while (sc.hasNextLine()) { + line = sc.nextLine(); + if (set.add(line)) { + try { + writer.append(line).append(System.getProperty("line.separator")); + } catch (IOException e) { + LOG.error("clean() ERROR while appending data"); + LOG.error("clean() stack trace: " + e); + return; + } + } + } + try { + writer.flush(); + } catch (IOException e) { + LOG.error("clean() ERROR while flushing data"); + LOG.error(e.getMessage()); + e.printStackTrace(); + return; + } + + LOG.debug("clean() done."); + } +} diff --git a/duplicate-cleaner/src/main/java/de/begerad/duprm/Main.java b/duplicate-cleaner/src/main/java/de/begerad/duprm/Main.java new file mode 100644 index 0000000..5cb84f4 --- /dev/null +++ b/duplicate-cleaner/src/main/java/de/begerad/duprm/Main.java @@ -0,0 +1,47 @@ +package de.begerad.duprm; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; + +import static de.begerad.duprm.DuplicateCleaner.clean; + +public class Main { + + public static String pathInput = ""; + + public static String pathOutput = ""; + + public final static Logger LOG = LoggerFactory.getLogger(Main.class); + + public static void main(String[] args) { + LOG.debug("main() started..."); + + //check user input for input path + if (args.length < 1) { + System.out.println("Please enter input path as first parameter."); + return; + } + + pathInput = args[0]; + if (!(new File(pathInput).isFile())) { + System.out.println("parameter: " + + pathInput + + " is NOT a valid path."); + return; + } + + //check user input for output path + if (args.length < 2) { + System.out.println("Please enter output path as second parameter."); + return; + } + + pathOutput = args[1]; + //call duplicate cleaner + clean(pathInput, pathOutput); + + LOG.debug("main() done."); + } +} diff --git a/duplicate-cleaner/src/main/resources/log4j2.xml b/duplicate-cleaner/src/main/resources/log4j2.xml new file mode 100644 index 0000000..ce0e7ae --- /dev/null +++ b/duplicate-cleaner/src/main/resources/log4j2.xml @@ -0,0 +1,32 @@ + + + + + + [%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} %c{1} %m%n + + + + + + + + + + + + + + + + + + + + + + diff --git a/duplicate-cleaner/src/test/java/de/begerad/duprm/TestDuplicateCleaner.java b/duplicate-cleaner/src/test/java/de/begerad/duprm/TestDuplicateCleaner.java new file mode 100644 index 0000000..6fa0860 --- /dev/null +++ b/duplicate-cleaner/src/test/java/de/begerad/duprm/TestDuplicateCleaner.java @@ -0,0 +1,4 @@ +package de.begerad.duprm; + +public class TestDuplicateCleaner { +} diff --git a/duplicate-cleaner/src/test/resources/input.txt b/duplicate-cleaner/src/test/resources/input.txt new file mode 100644 index 0000000..9eda46a --- /dev/null +++ b/duplicate-cleaner/src/test/resources/input.txt @@ -0,0 +1,6 @@ +Hello how are you +Hello how are you +welcome to Tutorialspoint +Hello how are you +Hello how are you +welcome to Tutorialspoint