Skip to content
This repository has been archived by the owner on Jul 3, 2023. It is now read-only.

ANY23-610 -- Upgrade Tika to 2.8.0 #320

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cli/src/test/java/org/apache/any23/cli/MimeDetectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.any23.cli;

import org.junit.Ignore;
import org.junit.Test;

/**
Expand All @@ -31,6 +32,7 @@ public MimeDetectorTest() {
}

@Test
@Ignore("url now broken; redirects to https://twitter.com")
public void testDetectURL() throws Exception {
assumeOnlineAllowed();
runToolCheckExit0("http://twitter.com#micmos");
Expand Down
2 changes: 2 additions & 0 deletions cli/src/test/java/org/apache/any23/cli/RoverTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.any23.util.URLUtils;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.Ignore;
import org.junit.Test;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.rio.RDFFormat;
Expand Down Expand Up @@ -112,6 +113,7 @@ public void testDelegatingWriterFactory() throws Exception {
/* BEGIN: online tests. */

@Test
@Ignore("urls no longer work as expected")
public void testRunMultiURLs() throws Exception {
// Assuming first accessibility to remote resources.
assumeOnlineAllowed();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.eclipse.rdf4j.model.vocabulary.RDFS;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yaml.snakeyaml.LoaderOptions;
import org.yaml.snakeyaml.Yaml;
import org.yaml.snakeyaml.constructor.SafeConstructor;

Expand All @@ -42,7 +43,7 @@ public class YAMLExtractor implements Extractor.ContentExtractor {

private final Logger log = LoggerFactory.getLogger(getClass());

private static final Yaml yml = new Yaml(new SafeConstructor());
private static final Yaml yml = new Yaml(new SafeConstructor(new LoaderOptions()));

private static final YAML vocab = YAML.getInstance();

Expand Down
2 changes: 2 additions & 0 deletions core/src/test/java/org/apache/any23/Any23Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.apache.any23.writer.TripleHandlerException;
import org.apache.commons.io.IOUtils;
import org.junit.AssumptionViolatedException;
import org.junit.Ignore;
import org.junit.Test;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.repository.Repository;
Expand Down Expand Up @@ -286,6 +287,7 @@ protected int getSoTimeout() {
* if there is an error defining input URI's
*/
@Test
@Ignore("url returns 404")
public void testGZippedContent() throws IOException, URISyntaxException, ExtractionException {
assumeOnlineAllowed();
final Any23 runner = new Any23();
Expand Down
68 changes: 58 additions & 10 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -280,10 +280,10 @@
<semargl.version>0.7</semargl.version>
<log4j2.version>2.17.2</log4j2.version>
<slf4j.version>1.7.36</slf4j.version>
<tika.version>2.4.0</tika.version>
<tika.version>2.8.0</tika.version>
<openie_2.11.version>4.2.6</openie_2.11.version>
<openregex.version>1.1.1</openregex.version>
<jackson.version>2.13.1</jackson.version>
<jackson.version>2.14.2</jackson.version>
<commons-io.version>2.11.0</commons-io.version>
<velocity.version>1.7</velocity.version>

Expand Down Expand Up @@ -351,6 +351,16 @@
<artifactId>commons-lang3</artifactId>
<version>3.12.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>1.10.0</version>
</dependency>
<dependency>
<groupId>org.yaml</groupId>
<artifactId>snakeyaml</artifactId>
<version>2.0</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
Expand Down Expand Up @@ -389,14 +399,43 @@
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.15.1</version>
<version>1.16.1</version>
</dependency>
<dependency>
<groupId>net.sf.biweekly</groupId>
<artifactId>biweekly</artifactId>
<version>0.6.6</version>
</dependency>

<dependency>
<groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk18on</artifactId>
<version>1.73</version>
</dependency>
<dependency>
<groupId>org.osgi</groupId>
<artifactId>osgi.annotation</artifactId>
<version>8.1.0</version>
</dependency>
<dependency>
<groupId>org.osgi</groupId>
<artifactId>org.osgi.util.function</artifactId>
<version>1.2.0</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>${jackson.version}</version>
</dependency>
<!-- BEGIN: Tika -->
<dependency>
<groupId>org.apache.tika</groupId>
Expand Down Expand Up @@ -656,12 +695,6 @@
</dependency>
<!-- END: logger -->

<dependency>
<groupId>org.yaml</groupId>
<artifactId>snakeyaml</artifactId>
<version>1.30</version>
</dependency>

<!-- BEGIN: Test Dependencies -->
<dependency>
<groupId>junit</groupId>
Expand Down Expand Up @@ -837,6 +870,21 @@
<!-- Remove once we upgrade to org.apache.tika:tika-parsers-standard-package:jar:2.3.0 -->
<exclude>af5f9c26-c09d-401f-a7fc-0785eeabeab3</exclude>
</excludeVulnerabilityIds>
<excludeCoordinates>
<!-- https://ossindex.sonatype.org/component/pkg:maven/com.google.guava/[email protected]?utm_source=ossindex-client&utm_medium=integration&utm_content=1.8.1 -->
<exclude>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>30.1.1-jre</version>
</exclude>
<exclude>
<!-- https://ossindex.sonatype.org/component/pkg:maven/xerces/[email protected]?utm_source=ossindex-client&utm_medium=integration&utm_content=1.8.1 -->
<groupId>xerces</groupId>
<artifactId>xercesImpl</artifactId>
<version>2.12.2</version>
</exclude>
</excludeCoordinates>

</configuration>
<executions>
<execution>
Expand Down