From 9150753a2041aaa0caacc8b1def7c23cbcd9745d Mon Sep 17 00:00:00 2001 From: longphan98 Date: Wed, 25 May 2022 13:38:44 +0700 Subject: [PATCH 1/5] [TIKA-1800] Decode the escape characters in front of special characters --- .../main/java/org/apache/tika/mime/MediaType.java | 15 ++++++++++++--- .../java/org/apache/tika/mime/MediaTypeTest.java | 13 +++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/mime/MediaType.java b/tika-core/src/main/java/org/apache/tika/mime/MediaType.java index 13ad6ed9cd8..170431752fc 100644 --- a/tika-core/src/main/java/org/apache/tika/mime/MediaType.java +++ b/tika-core/src/main/java/org/apache/tika/mime/MediaType.java @@ -39,15 +39,15 @@ public final class MediaType implements Comparable, Serializable { */ private static final long serialVersionUID = -3831000556189036392L; - private static final Pattern SPECIAL = Pattern.compile("[\\(\\)<>@,;:\\\\\"/\\[\\]\\?=]"); + private static final Pattern SPECIAL = Pattern.compile("[()<>@,;:\\\\\"/\\[\\]?=]"); private static final Pattern SPECIAL_OR_WHITESPACE = - Pattern.compile("[\\(\\)<>@,;:\\\\\"/\\[\\]\\?=\\s]"); + Pattern.compile("[()<>@,;:\\\\\"/\\[\\]?=\\s]"); /** * See http://www.ietf.org/rfc/rfc2045.txt for valid mime-type characters. */ - private static final String VALID_CHARS = "([^\\c\\(\\)<>@,;:\\\\\"/\\[\\]\\?=\\s]+)"; + private static final String VALID_CHARS = "([^\\c\\()<>@,;:\\\\\"/\\[\\]?=\\s]+)"; private static final Pattern TYPE_PATTERN = Pattern.compile("(?s)\\s*" + VALID_CHARS + "\\s*/\\s*" + VALID_CHARS + "\\s*($|;.*)"); @@ -331,6 +331,7 @@ private static Map parseParameters(String string) { /** * Fuzzy unquoting mechanism that works also with somewhat malformed * quotes. + * TIKA-1800: get rid of the escape characters which are in front of special characters for further usage (.e.g to parse on to a new MediaType as parameters) * * @param s string to unquote * @return unquoted string @@ -342,6 +343,14 @@ private static String unquote(String s) { while (s.endsWith("\"") || s.endsWith("'")) { s = s.substring(0, s.length() - 1); } + for (int i = 0; i < s.length() - 1; i++) { + if (s.charAt(i) == '\\' && !('0' <= s.charAt(i + 1) && s.charAt(i + 1) <= '9') && + !('a' <= s.charAt(i + 1) && s.charAt(i + 1) <= 'z') && + s.charAt(i + 1) != '-' && s.charAt(i + 1) != '+' && + s.charAt(i + 1) != '.' && s.charAt(i + 1) != '_') { + s = s.substring(0, i) + s.substring(i + 1); + } + } return s; } diff --git a/tika-core/src/test/java/org/apache/tika/mime/MediaTypeTest.java b/tika-core/src/test/java/org/apache/tika/mime/MediaTypeTest.java index 64a2bebf820..47aa52abf9e 100644 --- a/tika-core/src/test/java/org/apache/tika/mime/MediaTypeTest.java +++ b/tika-core/src/test/java/org/apache/tika/mime/MediaTypeTest.java @@ -21,13 +21,26 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.io.IOException; import java.util.HashMap; import java.util.Map; +import org.apache.tika.exception.TikaException; import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; public class MediaTypeTest { + // TIKA-1800 + @Test + public void testEscapedSpecialChar() { + MediaType mType = new MediaType(MediaType.APPLICATION_XML, "x-report", "#report?"); + String cType = mType.toString(); + mType = MediaType.parse(cType); + String report = mType.getParameters().get("x-report"); + assertEquals("#report?", report); + } + @Test public void testBasics() { assertEquals("application/octet-stream", From bf956f9607ee10cbb5ff3d02679d371087b7c7d6 Mon Sep 17 00:00:00 2001 From: longphan98 Date: Wed, 25 May 2022 13:47:14 +0700 Subject: [PATCH 2/5] [TIKA-2392] Fix the rest of potential bugs and 2 new ones that may trigger NullPointerException + add test case --- .../java/org/apache/tika/gui/TikaGUI.java | 2 +- .../TikaFileTypeDetectorTest.java | 2 ++ .../tika/parser/ocr/TesseractOCRConfig.java | 2 +- .../parser/ocr/TesseractOCRConfigTest.java | 21 ++++++++++++------- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java index f23da189fae..72b610fbcf3 100644 --- a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java +++ b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java @@ -610,7 +610,7 @@ public File requestSave(String embeddedName) throws IOException { int splitAt = embeddedName.lastIndexOf('.'); if (splitAt > 0) { - embeddedName.substring(splitAt); + embeddedName = embeddedName.substring(splitAt); } File tmp = File.createTempFile("tika-embedded-", suffix); diff --git a/tika-java7/src/test/java/org/apache/tika/filetypedetector/TikaFileTypeDetectorTest.java b/tika-java7/src/test/java/org/apache/tika/filetypedetector/TikaFileTypeDetectorTest.java index b21b7b3587e..fed1908dc1e 100644 --- a/tika-java7/src/test/java/org/apache/tika/filetypedetector/TikaFileTypeDetectorTest.java +++ b/tika-java7/src/test/java/org/apache/tika/filetypedetector/TikaFileTypeDetectorTest.java @@ -47,9 +47,11 @@ public void setUp() throws Exception { testDirectory = tempDir; System.out.println(testDirectory.toAbsolutePath()); try (InputStream is = this.getClass().getResourceAsStream(TEST_CLASSPATH)) { + assert is != null; Files.copy(is, testDirectory.resolve(TEST_HTML)); } try (InputStream is = this.getClass().getResourceAsStream(TEST_CLASSPATH)) { + assert is != null; Files.copy(is, testDirectory.resolve(TEST_UNRECOGNISED_EXTENSION)); } } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java index 18359735ffc..a0173992c1f 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java @@ -410,7 +410,7 @@ public String getFilter() { * Default value is triangle. */ public void setFilter(String filter) { - if (filter.equals(null)) { + if (filter == null) { throw new IllegalArgumentException( "Filter value cannot be null. Valid values are point, hermite, " + "cubic, box, gaussian, catrom, triangle, quadratic and mitchell."); diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java index 6bd6ff0597a..1f12ae35c00 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java @@ -16,11 +16,6 @@ */ package org.apache.tika.parser.ocr; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - import java.io.InputStream; import java.util.Arrays; import java.util.List; @@ -31,6 +26,8 @@ import org.apache.tika.config.TikaConfig; import org.apache.tika.parser.CompositeParser; +import static org.junit.jupiter.api.Assertions.*; + public class TesseractOCRConfigTest extends TikaTest { @Test @@ -46,7 +43,7 @@ public void testNoConfig() throws Exception { assertEquals("gray", config.getColorspace(), "Invalid default colorpsace value"); assertEquals("triangle", config.getFilter(), "Invalid default filter value"); assertEquals(200, config.getResize(), "Invalid default resize value"); - assertEquals(false, config.isApplyRotation(), "Invalid default applyRotation value"); + assertFalse(config.isApplyRotation(), "Invalid default applyRotation value"); } @Test @@ -67,7 +64,7 @@ public void testPartialConfig() throws Exception { assertEquals(8, config.getDepth(), "Invalid overridden depth value"); assertEquals("box", config.getFilter(), "Invalid overridden filter value"); assertEquals(300, config.getResize(), "Invalid overridden resize value"); - assertEquals(false, config.isApplyRotation(), "Invalid default applyRotation value"); + assertFalse(config.isApplyRotation(), "Invalid default applyRotation value"); } @Test @@ -89,7 +86,7 @@ public void testFullConfig() throws Exception { assertEquals(8, config.getDepth(), "Invalid overridden depth value"); assertEquals("box", config.getFilter(), "Invalid overridden filter value"); assertEquals(300, config.getResize(), "Invalid overridden resize value"); - assertEquals(true, config.isApplyRotation(), "Invalid default applyRotation value"); + assertTrue(config.isApplyRotation(), "Invalid default applyRotation value"); } @Test @@ -250,6 +247,14 @@ public void testBadColorSpace() { }); } + @Test + public void testNullFilter() { + TesseractOCRConfig config = new TesseractOCRConfig(); + assertThrows(IllegalArgumentException.class, () -> { + config.setFilter(null); + }); + } + @Test public void testUpdatingConfigs() throws Exception { TesseractOCRConfig configA = new TesseractOCRConfig(); From 6bfee4a54298b32c3ed8e77472eeb40822843208 Mon Sep 17 00:00:00 2001 From: longphan98 Date: Wed, 25 May 2022 13:50:45 +0700 Subject: [PATCH 3/5] Revert "[TIKA-1800] Decode the escape characters in front of special characters" This reverts commit 9150753a2041aaa0caacc8b1def7c23cbcd9745d. --- .../main/java/org/apache/tika/mime/MediaType.java | 15 +++------------ .../java/org/apache/tika/mime/MediaTypeTest.java | 13 ------------- 2 files changed, 3 insertions(+), 25 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/mime/MediaType.java b/tika-core/src/main/java/org/apache/tika/mime/MediaType.java index 170431752fc..13ad6ed9cd8 100644 --- a/tika-core/src/main/java/org/apache/tika/mime/MediaType.java +++ b/tika-core/src/main/java/org/apache/tika/mime/MediaType.java @@ -39,15 +39,15 @@ public final class MediaType implements Comparable, Serializable { */ private static final long serialVersionUID = -3831000556189036392L; - private static final Pattern SPECIAL = Pattern.compile("[()<>@,;:\\\\\"/\\[\\]?=]"); + private static final Pattern SPECIAL = Pattern.compile("[\\(\\)<>@,;:\\\\\"/\\[\\]\\?=]"); private static final Pattern SPECIAL_OR_WHITESPACE = - Pattern.compile("[()<>@,;:\\\\\"/\\[\\]?=\\s]"); + Pattern.compile("[\\(\\)<>@,;:\\\\\"/\\[\\]\\?=\\s]"); /** * See http://www.ietf.org/rfc/rfc2045.txt for valid mime-type characters. */ - private static final String VALID_CHARS = "([^\\c\\()<>@,;:\\\\\"/\\[\\]?=\\s]+)"; + private static final String VALID_CHARS = "([^\\c\\(\\)<>@,;:\\\\\"/\\[\\]\\?=\\s]+)"; private static final Pattern TYPE_PATTERN = Pattern.compile("(?s)\\s*" + VALID_CHARS + "\\s*/\\s*" + VALID_CHARS + "\\s*($|;.*)"); @@ -331,7 +331,6 @@ private static Map parseParameters(String string) { /** * Fuzzy unquoting mechanism that works also with somewhat malformed * quotes. - * TIKA-1800: get rid of the escape characters which are in front of special characters for further usage (.e.g to parse on to a new MediaType as parameters) * * @param s string to unquote * @return unquoted string @@ -343,14 +342,6 @@ private static String unquote(String s) { while (s.endsWith("\"") || s.endsWith("'")) { s = s.substring(0, s.length() - 1); } - for (int i = 0; i < s.length() - 1; i++) { - if (s.charAt(i) == '\\' && !('0' <= s.charAt(i + 1) && s.charAt(i + 1) <= '9') && - !('a' <= s.charAt(i + 1) && s.charAt(i + 1) <= 'z') && - s.charAt(i + 1) != '-' && s.charAt(i + 1) != '+' && - s.charAt(i + 1) != '.' && s.charAt(i + 1) != '_') { - s = s.substring(0, i) + s.substring(i + 1); - } - } return s; } diff --git a/tika-core/src/test/java/org/apache/tika/mime/MediaTypeTest.java b/tika-core/src/test/java/org/apache/tika/mime/MediaTypeTest.java index 47aa52abf9e..64a2bebf820 100644 --- a/tika-core/src/test/java/org/apache/tika/mime/MediaTypeTest.java +++ b/tika-core/src/test/java/org/apache/tika/mime/MediaTypeTest.java @@ -21,26 +21,13 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; -import java.io.IOException; import java.util.HashMap; import java.util.Map; -import org.apache.tika.exception.TikaException; import org.junit.jupiter.api.Test; -import org.xml.sax.SAXException; public class MediaTypeTest { - // TIKA-1800 - @Test - public void testEscapedSpecialChar() { - MediaType mType = new MediaType(MediaType.APPLICATION_XML, "x-report", "#report?"); - String cType = mType.toString(); - mType = MediaType.parse(cType); - String report = mType.getParameters().get("x-report"); - assertEquals("#report?", report); - } - @Test public void testBasics() { assertEquals("application/octet-stream", From 7eb012b819c71672bdc01493db74e78cf9b5f034 Mon Sep 17 00:00:00 2001 From: longphan98 Date: Wed, 25 May 2022 13:52:44 +0700 Subject: [PATCH 4/5] Revert "[TIKA-2392] Fix the rest of potential bugs and 2 new ones that may trigger NullPointerException + add test case" This reverts commit bf956f9607ee10cbb5ff3d02679d371087b7c7d6. --- .../java/org/apache/tika/gui/TikaGUI.java | 2 +- .../TikaFileTypeDetectorTest.java | 2 -- .../tika/parser/ocr/TesseractOCRConfig.java | 2 +- .../parser/ocr/TesseractOCRConfigTest.java | 21 +++++++------------ 4 files changed, 10 insertions(+), 17 deletions(-) diff --git a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java index 72b610fbcf3..f23da189fae 100644 --- a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java +++ b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java @@ -610,7 +610,7 @@ public File requestSave(String embeddedName) throws IOException { int splitAt = embeddedName.lastIndexOf('.'); if (splitAt > 0) { - embeddedName = embeddedName.substring(splitAt); + embeddedName.substring(splitAt); } File tmp = File.createTempFile("tika-embedded-", suffix); diff --git a/tika-java7/src/test/java/org/apache/tika/filetypedetector/TikaFileTypeDetectorTest.java b/tika-java7/src/test/java/org/apache/tika/filetypedetector/TikaFileTypeDetectorTest.java index fed1908dc1e..b21b7b3587e 100644 --- a/tika-java7/src/test/java/org/apache/tika/filetypedetector/TikaFileTypeDetectorTest.java +++ b/tika-java7/src/test/java/org/apache/tika/filetypedetector/TikaFileTypeDetectorTest.java @@ -47,11 +47,9 @@ public void setUp() throws Exception { testDirectory = tempDir; System.out.println(testDirectory.toAbsolutePath()); try (InputStream is = this.getClass().getResourceAsStream(TEST_CLASSPATH)) { - assert is != null; Files.copy(is, testDirectory.resolve(TEST_HTML)); } try (InputStream is = this.getClass().getResourceAsStream(TEST_CLASSPATH)) { - assert is != null; Files.copy(is, testDirectory.resolve(TEST_UNRECOGNISED_EXTENSION)); } } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java index a0173992c1f..18359735ffc 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java @@ -410,7 +410,7 @@ public String getFilter() { * Default value is triangle. */ public void setFilter(String filter) { - if (filter == null) { + if (filter.equals(null)) { throw new IllegalArgumentException( "Filter value cannot be null. Valid values are point, hermite, " + "cubic, box, gaussian, catrom, triangle, quadratic and mitchell."); diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java index 1f12ae35c00..6bd6ff0597a 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java @@ -16,6 +16,11 @@ */ package org.apache.tika.parser.ocr; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + import java.io.InputStream; import java.util.Arrays; import java.util.List; @@ -26,8 +31,6 @@ import org.apache.tika.config.TikaConfig; import org.apache.tika.parser.CompositeParser; -import static org.junit.jupiter.api.Assertions.*; - public class TesseractOCRConfigTest extends TikaTest { @Test @@ -43,7 +46,7 @@ public void testNoConfig() throws Exception { assertEquals("gray", config.getColorspace(), "Invalid default colorpsace value"); assertEquals("triangle", config.getFilter(), "Invalid default filter value"); assertEquals(200, config.getResize(), "Invalid default resize value"); - assertFalse(config.isApplyRotation(), "Invalid default applyRotation value"); + assertEquals(false, config.isApplyRotation(), "Invalid default applyRotation value"); } @Test @@ -64,7 +67,7 @@ public void testPartialConfig() throws Exception { assertEquals(8, config.getDepth(), "Invalid overridden depth value"); assertEquals("box", config.getFilter(), "Invalid overridden filter value"); assertEquals(300, config.getResize(), "Invalid overridden resize value"); - assertFalse(config.isApplyRotation(), "Invalid default applyRotation value"); + assertEquals(false, config.isApplyRotation(), "Invalid default applyRotation value"); } @Test @@ -86,7 +89,7 @@ public void testFullConfig() throws Exception { assertEquals(8, config.getDepth(), "Invalid overridden depth value"); assertEquals("box", config.getFilter(), "Invalid overridden filter value"); assertEquals(300, config.getResize(), "Invalid overridden resize value"); - assertTrue(config.isApplyRotation(), "Invalid default applyRotation value"); + assertEquals(true, config.isApplyRotation(), "Invalid default applyRotation value"); } @Test @@ -247,14 +250,6 @@ public void testBadColorSpace() { }); } - @Test - public void testNullFilter() { - TesseractOCRConfig config = new TesseractOCRConfig(); - assertThrows(IllegalArgumentException.class, () -> { - config.setFilter(null); - }); - } - @Test public void testUpdatingConfigs() throws Exception { TesseractOCRConfig configA = new TesseractOCRConfig(); From 59240db7545eb84b63a65ba25729b97505e274bf Mon Sep 17 00:00:00 2001 From: longphan98 Date: Wed, 25 May 2022 13:58:40 +0700 Subject: [PATCH 5/5] [TIKA-2392] Fix the rest of potential bugs and 2 new ones that may trigger NullPointerException + add test case --- .../java/org/apache/tika/gui/TikaGUI.java | 2 +- .../TikaFileTypeDetectorTest.java | 2 ++ .../tika/parser/ocr/TesseractOCRConfig.java | 2 +- .../parser/ocr/TesseractOCRConfigTest.java | 21 ++++++++++++------- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java index f23da189fae..72b610fbcf3 100644 --- a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java +++ b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java @@ -610,7 +610,7 @@ public File requestSave(String embeddedName) throws IOException { int splitAt = embeddedName.lastIndexOf('.'); if (splitAt > 0) { - embeddedName.substring(splitAt); + embeddedName = embeddedName.substring(splitAt); } File tmp = File.createTempFile("tika-embedded-", suffix); diff --git a/tika-java7/src/test/java/org/apache/tika/filetypedetector/TikaFileTypeDetectorTest.java b/tika-java7/src/test/java/org/apache/tika/filetypedetector/TikaFileTypeDetectorTest.java index b21b7b3587e..fed1908dc1e 100644 --- a/tika-java7/src/test/java/org/apache/tika/filetypedetector/TikaFileTypeDetectorTest.java +++ b/tika-java7/src/test/java/org/apache/tika/filetypedetector/TikaFileTypeDetectorTest.java @@ -47,9 +47,11 @@ public void setUp() throws Exception { testDirectory = tempDir; System.out.println(testDirectory.toAbsolutePath()); try (InputStream is = this.getClass().getResourceAsStream(TEST_CLASSPATH)) { + assert is != null; Files.copy(is, testDirectory.resolve(TEST_HTML)); } try (InputStream is = this.getClass().getResourceAsStream(TEST_CLASSPATH)) { + assert is != null; Files.copy(is, testDirectory.resolve(TEST_UNRECOGNISED_EXTENSION)); } } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java index 18359735ffc..a0173992c1f 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java @@ -410,7 +410,7 @@ public String getFilter() { * Default value is triangle. */ public void setFilter(String filter) { - if (filter.equals(null)) { + if (filter == null) { throw new IllegalArgumentException( "Filter value cannot be null. Valid values are point, hermite, " + "cubic, box, gaussian, catrom, triangle, quadratic and mitchell."); diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java index 6bd6ff0597a..1f12ae35c00 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java @@ -16,11 +16,6 @@ */ package org.apache.tika.parser.ocr; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - import java.io.InputStream; import java.util.Arrays; import java.util.List; @@ -31,6 +26,8 @@ import org.apache.tika.config.TikaConfig; import org.apache.tika.parser.CompositeParser; +import static org.junit.jupiter.api.Assertions.*; + public class TesseractOCRConfigTest extends TikaTest { @Test @@ -46,7 +43,7 @@ public void testNoConfig() throws Exception { assertEquals("gray", config.getColorspace(), "Invalid default colorpsace value"); assertEquals("triangle", config.getFilter(), "Invalid default filter value"); assertEquals(200, config.getResize(), "Invalid default resize value"); - assertEquals(false, config.isApplyRotation(), "Invalid default applyRotation value"); + assertFalse(config.isApplyRotation(), "Invalid default applyRotation value"); } @Test @@ -67,7 +64,7 @@ public void testPartialConfig() throws Exception { assertEquals(8, config.getDepth(), "Invalid overridden depth value"); assertEquals("box", config.getFilter(), "Invalid overridden filter value"); assertEquals(300, config.getResize(), "Invalid overridden resize value"); - assertEquals(false, config.isApplyRotation(), "Invalid default applyRotation value"); + assertFalse(config.isApplyRotation(), "Invalid default applyRotation value"); } @Test @@ -89,7 +86,7 @@ public void testFullConfig() throws Exception { assertEquals(8, config.getDepth(), "Invalid overridden depth value"); assertEquals("box", config.getFilter(), "Invalid overridden filter value"); assertEquals(300, config.getResize(), "Invalid overridden resize value"); - assertEquals(true, config.isApplyRotation(), "Invalid default applyRotation value"); + assertTrue(config.isApplyRotation(), "Invalid default applyRotation value"); } @Test @@ -250,6 +247,14 @@ public void testBadColorSpace() { }); } + @Test + public void testNullFilter() { + TesseractOCRConfig config = new TesseractOCRConfig(); + assertThrows(IllegalArgumentException.class, () -> { + config.setFilter(null); + }); + } + @Test public void testUpdatingConfigs() throws Exception { TesseractOCRConfig configA = new TesseractOCRConfig();