feat(dup-cleaner): initial commit
This commit is contained in:
parent
8cbe941e5b
commit
5229bcc47d
|
@ -0,0 +1,5 @@
|
||||||
|
.idea
|
||||||
|
*~
|
||||||
|
log.txt
|
||||||
|
output.txt
|
||||||
|
target
|
|
@ -0,0 +1,91 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<name>dup-rm</name>
|
||||||
|
<description>remove duplicate lines from file</description>
|
||||||
|
<url>https://begerad.de</url>
|
||||||
|
<groupId>de.begerad.duprm</groupId>
|
||||||
|
<artifactId>dup-rm</artifactId>
|
||||||
|
<version>0.0.1</version>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<!-- https://maven.apache.org/general.html#encoding-warning -->
|
||||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
|
<!-- note that Maven Shade Plugin only works with Java from version 7 and up -->
|
||||||
|
<maven.compiler.source>1.8</maven.compiler.source>
|
||||||
|
<maven.compiler.target>1.8</maven.compiler.target>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<!-- junit 5, unit test -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.junit.jupiter</groupId>
|
||||||
|
<artifactId>junit-jupiter-engine</artifactId>
|
||||||
|
<version>5.7.2</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.logging.log4j</groupId>
|
||||||
|
<artifactId>log4j-slf4j-impl</artifactId>
|
||||||
|
<version>2.14.1</version>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<!--The default maven-surefire-plugin is outdated,
|
||||||
|
make sure update to the latest.-->
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-surefire-plugin</artifactId>
|
||||||
|
<version>2.22.2</version>
|
||||||
|
</plugin>
|
||||||
|
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-shade-plugin</artifactId>
|
||||||
|
<version>3.2.4</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>package</phase>
|
||||||
|
<goals>
|
||||||
|
<!-- tells that it should be run in package phase -->
|
||||||
|
<goal>shade</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<filters>
|
||||||
|
<filter>
|
||||||
|
<artifact>*:*</artifact>
|
||||||
|
<excludes>
|
||||||
|
<exclude>META-INF/*.MF</exclude>
|
||||||
|
</excludes>
|
||||||
|
</filter>
|
||||||
|
</filters>
|
||||||
|
<shadedArtifactAttached>true</shadedArtifactAttached>
|
||||||
|
<shadedClassifierName>depds</shadedClassifierName>
|
||||||
|
<transformers>
|
||||||
|
<!-- configure resources transformer for Maven Shade Plugin with ManifestResourceTransformer that allows us to define a mainClass in the META-INF/MANIFEST.MF file of the jar file, making the jar file after building, can run standalone -->
|
||||||
|
<transformer
|
||||||
|
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||||
|
<!-- define some other information in the META-INF/MANIFEST.MF file inside the jar file -->
|
||||||
|
<manifestEntries>
|
||||||
|
<Main-Class>de.begerad.duprm.Main</Main-Class>
|
||||||
|
<Specification-Title>${project.artifactId}</Specification-Title>
|
||||||
|
<Specification-Version>${project.version}</Specification-Version>
|
||||||
|
<Implementation-Title>${project.artifactId}</Implementation-Title>
|
||||||
|
<Implementation-Version>${project.version}</Implementation-Version>
|
||||||
|
<Implementation-Vendor-Id>${project.groupId}</Implementation-Vendor-Id>
|
||||||
|
</manifestEntries>
|
||||||
|
</transformer>
|
||||||
|
</transformers>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</project>
|
|
@ -0,0 +1,70 @@
|
||||||
|
package de.begerad.duprm;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.FileWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Scanner;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
public class DuplicateCleaner {
|
||||||
|
|
||||||
|
public final static Logger LOG = LoggerFactory.getLogger(DuplicateCleaner.class);
|
||||||
|
|
||||||
|
public static void clean(final String pathInput, final String pathOutput) {
|
||||||
|
LOG.debug("clean() started.");
|
||||||
|
|
||||||
|
String line;
|
||||||
|
|
||||||
|
//instantiating the Scanner class
|
||||||
|
Scanner sc;
|
||||||
|
try {
|
||||||
|
sc = new Scanner(new File(pathInput));
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
LOG.error("clean() ERROR while accessing " + pathInput + " file");
|
||||||
|
LOG.error("clean() stack trace: " + e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
//instantiating the FileWriter class
|
||||||
|
FileWriter writer;
|
||||||
|
try {
|
||||||
|
writer = new FileWriter(pathOutput);
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.error("clean() ERROR while accessing " + pathOutput + " file");
|
||||||
|
LOG.error("clean() stack trace: " + e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
//instantiating the Set class
|
||||||
|
Set<String> set = new HashSet<>();
|
||||||
|
|
||||||
|
while (sc.hasNextLine()) {
|
||||||
|
line = sc.nextLine();
|
||||||
|
if (set.add(line)) {
|
||||||
|
try {
|
||||||
|
writer.append(line).append(System.getProperty("line.separator"));
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.error("clean() ERROR while appending data");
|
||||||
|
LOG.error("clean() stack trace: " + e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
writer.flush();
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.error("clean() ERROR while flushing data");
|
||||||
|
LOG.error(e.getMessage());
|
||||||
|
e.printStackTrace();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG.debug("clean() done.");
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,47 @@
|
||||||
|
package de.begerad.duprm;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
import static de.begerad.duprm.DuplicateCleaner.clean;
|
||||||
|
|
||||||
|
public class Main {
|
||||||
|
|
||||||
|
public static String pathInput = "";
|
||||||
|
|
||||||
|
public static String pathOutput = "";
|
||||||
|
|
||||||
|
public final static Logger LOG = LoggerFactory.getLogger(Main.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
LOG.debug("main() started...");
|
||||||
|
|
||||||
|
//check user input for input path
|
||||||
|
if (args.length < 1) {
|
||||||
|
System.out.println("Please enter input path as first parameter.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
pathInput = args[0];
|
||||||
|
if (!(new File(pathInput).isFile())) {
|
||||||
|
System.out.println("parameter: "
|
||||||
|
+ pathInput
|
||||||
|
+ " is NOT a valid path.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
//check user input for output path
|
||||||
|
if (args.length < 2) {
|
||||||
|
System.out.println("Please enter output path as second parameter.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
pathOutput = args[1];
|
||||||
|
//call duplicate cleaner
|
||||||
|
clean(pathInput, pathOutput);
|
||||||
|
|
||||||
|
LOG.debug("main() done.");
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<Configuration status="INFO">
|
||||||
|
<Appenders>
|
||||||
|
<RollingFile name="rollingFile"
|
||||||
|
fileName="log.txt"
|
||||||
|
filePattern="log-%d{yyyy-MM-dd}.txt"
|
||||||
|
ignoreExceptions="false"
|
||||||
|
>
|
||||||
|
<PatternLayout>
|
||||||
|
<Pattern>[%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} %c{1} %m%n</Pattern>
|
||||||
|
</PatternLayout>
|
||||||
|
<Policies>
|
||||||
|
<SizeBasedTriggeringPolicy size="3MB" />
|
||||||
|
</Policies>
|
||||||
|
<!--ensure that within the same rollover period no more than X files will be created when a size-based rollover was triggered-->
|
||||||
|
<DefaultRolloverStrategy max="5" />
|
||||||
|
</RollingFile>
|
||||||
|
|
||||||
|
<Console name="console" target="SYSTEM_OUT">
|
||||||
|
<PatternLayout pattern="[%-5level] %d{yyyy-MM-dd HH:mm:ss.SSS} %c{1} - %msg%n" />
|
||||||
|
</Console>
|
||||||
|
</Appenders>
|
||||||
|
<Loggers>
|
||||||
|
<Logger name="de.begerad" level="debug" additivity="true">
|
||||||
|
<appender-ref ref="rollingFile" level="debug" />
|
||||||
|
</Logger>
|
||||||
|
<Root level="debug" additivity="false">
|
||||||
|
<appender-ref ref="console" />
|
||||||
|
</Root>
|
||||||
|
</Loggers>
|
||||||
|
|
||||||
|
</Configuration>
|
|
@ -0,0 +1,4 @@
|
||||||
|
package de.begerad.duprm;
|
||||||
|
|
||||||
|
public class TestDuplicateCleaner {
|
||||||
|
}
|
|
@ -0,0 +1,6 @@
|
||||||
|
Hello how are you
|
||||||
|
Hello how are you
|
||||||
|
welcome to Tutorialspoint
|
||||||
|
Hello how are you
|
||||||
|
Hello how are you
|
||||||
|
welcome to Tutorialspoint
|
Loading…
Reference in New Issue