feat(dup-cleaner): initial commit
This commit is contained in:
parent
8cbe941e5b
commit
5229bcc47d
|
@ -0,0 +1,5 @@
|
|||
.idea
|
||||
*~
|
||||
log.txt
|
||||
output.txt
|
||||
target
|
|
@ -0,0 +1,91 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<name>dup-rm</name>
|
||||
<description>remove duplicate lines from file</description>
|
||||
<url>https://begerad.de</url>
|
||||
<groupId>de.begerad.duprm</groupId>
|
||||
<artifactId>dup-rm</artifactId>
|
||||
<version>0.0.1</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<properties>
|
||||
<!-- https://maven.apache.org/general.html#encoding-warning -->
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<!-- note that Maven Shade Plugin only works with Java from version 7 and up -->
|
||||
<maven.compiler.source>1.8</maven.compiler.source>
|
||||
<maven.compiler.target>1.8</maven.compiler.target>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<!-- junit 5, unit test -->
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter-engine</artifactId>
|
||||
<version>5.7.2</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-slf4j-impl</artifactId>
|
||||
<version>2.14.1</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<!--The default maven-surefire-plugin is outdated,
|
||||
make sure update to the latest.-->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<version>2.22.2</version>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.2.4</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<!-- tells that it should be run in package phase -->
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<filters>
|
||||
<filter>
|
||||
<artifact>*:*</artifact>
|
||||
<excludes>
|
||||
<exclude>META-INF/*.MF</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
</filters>
|
||||
<shadedArtifactAttached>true</shadedArtifactAttached>
|
||||
<shadedClassifierName>depds</shadedClassifierName>
|
||||
<transformers>
|
||||
<!-- configure resources transformer for Maven Shade Plugin with ManifestResourceTransformer that allows us to define a mainClass in the META-INF/MANIFEST.MF file of the jar file, making the jar file after building, can run standalone -->
|
||||
<transformer
|
||||
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||
<!-- define some other information in the META-INF/MANIFEST.MF file inside the jar file -->
|
||||
<manifestEntries>
|
||||
<Main-Class>de.begerad.duprm.Main</Main-Class>
|
||||
<Specification-Title>${project.artifactId}</Specification-Title>
|
||||
<Specification-Version>${project.version}</Specification-Version>
|
||||
<Implementation-Title>${project.artifactId}</Implementation-Title>
|
||||
<Implementation-Version>${project.version}</Implementation-Version>
|
||||
<Implementation-Vendor-Id>${project.groupId}</Implementation-Vendor-Id>
|
||||
</manifestEntries>
|
||||
</transformer>
|
||||
</transformers>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
|
@ -0,0 +1,70 @@
|
|||
package de.begerad.duprm;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Scanner;
|
||||
import java.util.Set;
|
||||
|
||||
public class DuplicateCleaner {
|
||||
|
||||
public final static Logger LOG = LoggerFactory.getLogger(DuplicateCleaner.class);
|
||||
|
||||
public static void clean(final String pathInput, final String pathOutput) {
|
||||
LOG.debug("clean() started.");
|
||||
|
||||
String line;
|
||||
|
||||
//instantiating the Scanner class
|
||||
Scanner sc;
|
||||
try {
|
||||
sc = new Scanner(new File(pathInput));
|
||||
} catch (FileNotFoundException e) {
|
||||
LOG.error("clean() ERROR while accessing " + pathInput + " file");
|
||||
LOG.error("clean() stack trace: " + e);
|
||||
return;
|
||||
}
|
||||
|
||||
//instantiating the FileWriter class
|
||||
FileWriter writer;
|
||||
try {
|
||||
writer = new FileWriter(pathOutput);
|
||||
} catch (IOException e) {
|
||||
LOG.error("clean() ERROR while accessing " + pathOutput + " file");
|
||||
LOG.error("clean() stack trace: " + e);
|
||||
return;
|
||||
}
|
||||
|
||||
//instantiating the Set class
|
||||
Set<String> set = new HashSet<>();
|
||||
|
||||
while (sc.hasNextLine()) {
|
||||
line = sc.nextLine();
|
||||
if (set.add(line)) {
|
||||
try {
|
||||
writer.append(line).append(System.getProperty("line.separator"));
|
||||
} catch (IOException e) {
|
||||
LOG.error("clean() ERROR while appending data");
|
||||
LOG.error("clean() stack trace: " + e);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
try {
|
||||
writer.flush();
|
||||
} catch (IOException e) {
|
||||
LOG.error("clean() ERROR while flushing data");
|
||||
LOG.error(e.getMessage());
|
||||
e.printStackTrace();
|
||||
return;
|
||||
}
|
||||
|
||||
LOG.debug("clean() done.");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
package de.begerad.duprm;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import static de.begerad.duprm.DuplicateCleaner.clean;
|
||||
|
||||
public class Main {
|
||||
|
||||
public static String pathInput = "";
|
||||
|
||||
public static String pathOutput = "";
|
||||
|
||||
public final static Logger LOG = LoggerFactory.getLogger(Main.class);
|
||||
|
||||
public static void main(String[] args) {
|
||||
LOG.debug("main() started...");
|
||||
|
||||
//check user input for input path
|
||||
if (args.length < 1) {
|
||||
System.out.println("Please enter input path as first parameter.");
|
||||
return;
|
||||
}
|
||||
|
||||
pathInput = args[0];
|
||||
if (!(new File(pathInput).isFile())) {
|
||||
System.out.println("parameter: "
|
||||
+ pathInput
|
||||
+ " is NOT a valid path.");
|
||||
return;
|
||||
}
|
||||
|
||||
//check user input for output path
|
||||
if (args.length < 2) {
|
||||
System.out.println("Please enter output path as second parameter.");
|
||||
return;
|
||||
}
|
||||
|
||||
pathOutput = args[1];
|
||||
//call duplicate cleaner
|
||||
clean(pathInput, pathOutput);
|
||||
|
||||
LOG.debug("main() done.");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Configuration status="INFO">
|
||||
<Appenders>
|
||||
<RollingFile name="rollingFile"
|
||||
fileName="log.txt"
|
||||
filePattern="log-%d{yyyy-MM-dd}.txt"
|
||||
ignoreExceptions="false"
|
||||
>
|
||||
<PatternLayout>
|
||||
<Pattern>[%-5p] %d{yyyy-MM-dd HH:mm:ss.SSS} %c{1} %m%n</Pattern>
|
||||
</PatternLayout>
|
||||
<Policies>
|
||||
<SizeBasedTriggeringPolicy size="3MB" />
|
||||
</Policies>
|
||||
<!--ensure that within the same rollover period no more than X files will be created when a size-based rollover was triggered-->
|
||||
<DefaultRolloverStrategy max="5" />
|
||||
</RollingFile>
|
||||
|
||||
<Console name="console" target="SYSTEM_OUT">
|
||||
<PatternLayout pattern="[%-5level] %d{yyyy-MM-dd HH:mm:ss.SSS} %c{1} - %msg%n" />
|
||||
</Console>
|
||||
</Appenders>
|
||||
<Loggers>
|
||||
<Logger name="de.begerad" level="debug" additivity="true">
|
||||
<appender-ref ref="rollingFile" level="debug" />
|
||||
</Logger>
|
||||
<Root level="debug" additivity="false">
|
||||
<appender-ref ref="console" />
|
||||
</Root>
|
||||
</Loggers>
|
||||
|
||||
</Configuration>
|
|
@ -0,0 +1,4 @@
|
|||
package de.begerad.duprm;
|
||||
|
||||
public class TestDuplicateCleaner {
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
Hello how are you
|
||||
Hello how are you
|
||||
welcome to Tutorialspoint
|
||||
Hello how are you
|
||||
Hello how are you
|
||||
welcome to Tutorialspoint
|
Loading…
Reference in New Issue