feat(dup-cleaner): initial commit

This commit is contained in:
Begerad, Stefan 2021-10-05 09:54:45 -04:00
parent 8cbe941e5b
commit 5229bcc47d
7 changed files with 255 additions and 0 deletions

5
duplicate-cleaner/.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
.idea
*~
log.txt
output.txt
target

91
duplicate-cleaner/pom.xml Normal file
View File

@ -0,0 +1,91 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<name>dup-rm</name>
<description>remove duplicate lines from file</description>
<url>https://begerad.de</url>
<groupId>de.begerad.duprm</groupId>
<artifactId>dup-rm</artifactId>
<version>0.0.1</version>
<packaging>jar</packaging>
<properties>
<!-- https://maven.apache.org/general.html#encoding-warning -->
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<!-- note that Maven Shade Plugin only works with Java from version 7 and up -->
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<!-- junit 5, unit test -->
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<version>5.7.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>2.14.1</version>
</dependency>
</dependencies>
<build>
<plugins>
<!--The default maven-surefire-plugin is outdated,
make sure update to the latest.-->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.2</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.4</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<!-- tells that it should be run in package phase -->
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.MF</exclude>
</excludes>
</filter>
</filters>
<shadedArtifactAttached>true</shadedArtifactAttached>
<shadedClassifierName>depds</shadedClassifierName>
<transformers>
<!-- configure resources transformer for Maven Shade Plugin with ManifestResourceTransformer that allows us to define a mainClass in the META-INF/MANIFEST.MF file of the jar file, making the jar file after building, can run standalone -->
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<!-- define some other information in the META-INF/MANIFEST.MF file inside the jar file -->
<manifestEntries>
<Main-Class>de.begerad.duprm.Main</Main-Class>
<Specification-Title>${project.artifactId}</Specification-Title>
<Specification-Version>${project.version}</Specification-Version>
<Implementation-Title>${project.artifactId}</Implementation-Title>
<Implementation-Version>${project.version}</Implementation-Version>
<Implementation-Vendor-Id>${project.groupId}</Implementation-Vendor-Id>
</manifestEntries>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,70 @@
package de.begerad.duprm;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Scanner;
import java.util.Set;
public class DuplicateCleaner {
public final static Logger LOG = LoggerFactory.getLogger(DuplicateCleaner.class);
public static void clean(final String pathInput, final String pathOutput) {
LOG.debug("clean() started.");
String line;
//instantiating the Scanner class
Scanner sc;
try {
sc = new Scanner(new File(pathInput));
} catch (FileNotFoundException e) {
LOG.error("clean() ERROR while accessing " + pathInput + " file");
LOG.error("clean() stack trace: " + e);
return;
}
//instantiating the FileWriter class
FileWriter writer;
try {
writer = new FileWriter(pathOutput);
} catch (IOException e) {
LOG.error("clean() ERROR while accessing " + pathOutput + " file");
LOG.error("clean() stack trace: " + e);
return;
}
//instantiating the Set class
Set<String> set = new HashSet<>();
while (sc.hasNextLine()) {
line = sc.nextLine();
if (set.add(line)) {
try {
writer.append(line).append(System.getProperty("line.separator"));
} catch (IOException e) {
LOG.error("clean() ERROR while appending data");
LOG.error("clean() stack trace: " + e);
return;
}
}
}
try {
writer.flush();
} catch (IOException e) {
LOG.error("clean() ERROR while flushing data");
LOG.error(e.getMessage());
e.printStackTrace();
return;
}
LOG.debug("clean() done.");
}
}

View File

@ -0,0 +1,47 @@
package de.begerad.duprm;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import static de.begerad.duprm.DuplicateCleaner.clean;
public class Main {
public static String pathInput = "";
public static String pathOutput = "";
public final static Logger LOG = LoggerFactory.getLogger(Main.class);
public static void main(String[] args) {
LOG.debug("main() started...");
//check user input for input path
if (args.length < 1) {
System.out.println("Please enter input path as first parameter.");
return;
}
pathInput = args[0];
if (!(new File(pathInput).isFile())) {
System.out.println("parameter: "
+ pathInput
+ " is NOT a valid path.");
return;
}
//check user input for output path
if (args.length < 2) {
System.out.println("Please enter output path as second parameter.");
return;
}
pathOutput = args[1];
//call duplicate cleaner
clean(pathInput, pathOutput);
LOG.debug("main() done.");
}
}

View File

@ -0,0 +1,32 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="INFO">
<Appenders>
<RollingFile name="rollingFile"
fileName="log.txt"
filePattern="log-%d{yyyy-MM-dd}.txt"
ignoreExceptions="false"
>
<PatternLayout>
<Pattern>[%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} %c{1} %m%n</Pattern>
</PatternLayout>
<Policies>
<SizeBasedTriggeringPolicy size="3MB" />
</Policies>
<!--ensure that within the same rollover period no more than X files will be created when a size-based rollover was triggered-->
<DefaultRolloverStrategy max="5" />
</RollingFile>
<Console name="console" target="SYSTEM_OUT">
<PatternLayout pattern="[%-5level] %d{yyyy-MM-dd HH:mm:ss.SSS} %c{1} - %msg%n" />
</Console>
</Appenders>
<Loggers>
<Logger name="de.begerad" level="debug" additivity="true">
<appender-ref ref="rollingFile" level="debug" />
</Logger>
<Root level="debug" additivity="false">
<appender-ref ref="console" />
</Root>
</Loggers>
</Configuration>

View File

@ -0,0 +1,4 @@
package de.begerad.duprm;
public class TestDuplicateCleaner {
}

View File

@ -0,0 +1,6 @@
Hello how are you
Hello how are you
welcome to Tutorialspoint
Hello how are you
Hello how are you
welcome to Tutorialspoint