Coverage Summary for Class: QueryURLImport (org.kitodo.queryurlimport)

Class Class, % Method, % Line, %
QueryURLImport 100% (1/1) 34,8% (8/23) 22,3% (54/242)


 /*
  * (c) Kitodo. Key to digital objects e. V. <contact@kitodo.org>
  *
  * This file is part of the Kitodo project.
  *
  * It is licensed under GNU General Public License version 3 or later.
  *
  * For the full copyright and license information, please read the
  * GPL3-License.txt file that was distributed with this source code.
  */
 
 package org.kitodo.queryurlimport;
 
 import static org.apache.http.HttpStatus.SC_OK;
 
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.StringReader;
 import java.io.StringWriter;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URLEncoder;
 import java.net.UnknownHostException;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import java.util.stream.Collectors;
 
 import javax.xml.XMLConstants;
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.transform.Transformer;
 import javax.xml.transform.TransformerException;
 import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.net.ftp.FTPClient;
 import org.apache.commons.net.ftp.FTPFile;
 import org.apache.commons.net.ftp.FTPFileFilter;
 import org.apache.http.HttpEntity;
 import org.apache.http.HttpResponse;
 import org.apache.http.auth.AuthScope;
 import org.apache.http.auth.UsernamePasswordCredentials;
 import org.apache.http.client.ClientProtocolException;
 import org.apache.http.client.CredentialsProvider;
 import org.apache.http.client.config.RequestConfig;
 import org.apache.http.client.methods.HttpGet;
 import org.apache.http.client.utils.URLEncodedUtils;
 import org.apache.http.conn.ConnectTimeoutException;
 import org.apache.http.impl.client.BasicCredentialsProvider;
 import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClientBuilder;
 import org.apache.http.message.BasicNameValuePair;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.kitodo.api.externaldatamanagement.DataImport;
 import org.kitodo.api.externaldatamanagement.ExternalDataImportInterface;
 import org.kitodo.api.externaldatamanagement.SearchInterfaceType;
 import org.kitodo.api.externaldatamanagement.SearchResult;
 import org.kitodo.api.externaldatamanagement.SingleHit;
 import org.kitodo.api.schemaconverter.DataRecord;
 import org.kitodo.api.schemaconverter.FileFormat;
 import org.kitodo.api.schemaconverter.MetadataFormat;
 import org.kitodo.exceptions.CatalogException;
 import org.kitodo.exceptions.ConfigException;
 import org.kitodo.exceptions.NoRecordFoundException;
 import org.w3c.dom.Document;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;
 
 public class QueryURLImport implements ExternalDataImportInterface {
 
     private static final Logger logger = LogManager.getLogger(QueryURLImport.class);
     private static final String MODS_RECORD_TAG = "mods";
     private static final String HTTP_PROTOCOL = "http";
     private static final String HTTPS_PROTOCOL = "https";
     private static final String FTP_PROTOCOL = "ftp";
     private static final String EQUALS_OPERAND = "=";
     private static final String AND = "&";
     private static final String OAI_IDENTIFIER = "identifier";
     private final Charset encoding = StandardCharsets.UTF_8;
 
     private CloseableHttpClient httpClient = HttpClientBuilder.create().build();
     private final FTPClient ftpClient = new FTPClient();
 
     @Override
     public DataRecord getFullRecordById(DataImport dataImport, String identifier) throws NoRecordFoundException {
         LinkedHashMap<String, String> queryParameters = new LinkedHashMap<>(dataImport.getUrlParameters());
         SearchInterfaceType interfaceType = dataImport.getSearchInterfaceType();
         try {
             if (SearchInterfaceType.FTP.equals(interfaceType)) {
                 return performFTPQueryToRecord(dataImport, identifier);
             } else {
                 URI queryURL = createQueryURI(dataImport, queryParameters);
                 return performQueryToRecord(dataImport, queryURL.toString(), identifier);
             }
         } catch (URISyntaxException e) {
             throw new ConfigException(e.getLocalizedMessage());
         }
     }
 
     @Override
     public List<DataRecord> getMultipleFullRecordsFromQuery(DataImport dataImport, String field, String value,
                                                             int rows) {
         HashMap<String, String> searchFields = new HashMap<>();
         searchFields.put(field, value);
         if (dataImport.getSearchFields().containsKey(field)) {
             // Query parameters for HTTP request
             LinkedHashMap<String, String> queryParameters = new LinkedHashMap<>(dataImport.getUrlParameters());
             // Search fields and terms of query
             LinkedHashMap<String, String> searchFieldMap = getSearchFieldMap(dataImport, searchFields);
 
             try {
                 URI queryURL = createQueryURI(dataImport, queryParameters);
                 String queryString = queryURL + AND;
                 SearchInterfaceType interfaceType = dataImport.getSearchInterfaceType();
                 if (Objects.nonNull(interfaceType)) {
                     if (Objects.nonNull(interfaceType.getStartRecordString())
                             && Objects.nonNull(interfaceType.getDefaultStartValue())) {
                         queryString = queryString + interfaceType.getStartRecordString() + EQUALS_OPERAND
                                 + interfaceType.getDefaultStartValue() + AND;
                     }
                     if (Objects.nonNull(interfaceType.getMaxRecordsString())) {
                         queryString = queryString + interfaceType.getMaxRecordsString() + EQUALS_OPERAND + rows + AND;
                     }
                     if (Objects.nonNull(interfaceType.getQueryString())) {
                         queryString = queryString + interfaceType.getQueryString() + EQUALS_OPERAND;
                     }
                 }
                 queryString = queryString + createSearchFieldString(interfaceType, searchFieldMap);
                 return performQueryToMultipleRecords(dataImport, queryString);
             } catch (URISyntaxException | IOException | ParserConfigurationException | SAXException
                     | TransformerException e) {
                 logger.error(e.getLocalizedMessage());
             }
         }
         return Collections.emptyList();
     }
 
     @Override
     public SearchResult search(DataImport dataImport, String field, String term, int rows) {
         return search(dataImport, field, term, 1, rows);
     }
 
     @Override
     public SearchResult search(DataImport dataImport, String key, String value, int start, int numberOfRecords) {
         switch (dataImport.getScheme()) {
             case FTP_PROTOCOL:
                 return performFtpRequest(dataImport, value, start, numberOfRecords);
             case HTTP_PROTOCOL:
             case HTTPS_PROTOCOL:
                 if (dataImport.getSearchFields().containsKey(key)
                         || SearchInterfaceType.OAI.equals(dataImport.getSearchInterfaceType())) {
                     return performHTTPRequest(dataImport, Collections.singletonMap(key, value),
                             start, numberOfRecords);
                 }
                 return null;
             default:
                 throw new CatalogException("Error: unknown protocol '" + dataImport.getScheme()
                         + "' configured in import configuration '" + dataImport.getTitle()
                         + "' (supported protocols are http, https and ftp)!");
         }
     }
 
     @Override
     public Collection<SingleHit> getMultipleEntriesById(Collection<String> ids, String catalogId) {
         return Collections.emptyList();
     }
 
     private void reinitializeHttpClient(String username, String password) throws IOException {
         httpClient.close();
         if (StringUtils.isNotBlank(username) && StringUtils.isNotBlank(password)) {
             CredentialsProvider provider = new BasicCredentialsProvider();
             UsernamePasswordCredentials credentials = new UsernamePasswordCredentials(username, password);
             provider.setCredentials(AuthScope.ANY, credentials);
             httpClient = HttpClientBuilder.create().setDefaultCredentialsProvider(provider).build();
         } else {
             httpClient = HttpClientBuilder.create().build();
         }
     }
 
     private SearchResult performQuery(DataImport dataImport, String queryURL) {
         try {
             this.reinitializeHttpClient(dataImport.getUsername(), dataImport.getPassword());
             logger.debug("Requesting: {}", queryURL);
             HttpResponse response = httpClient.execute(new HttpGet(queryURL));
             int responseStatusCode = response.getStatusLine().getStatusCode();
             if (Objects.equals(responseStatusCode, SC_OK)) {
                 return XmlResponseHandler.getSearchResult(response, dataImport);
             } else {
                 throw new CatalogException(response.getStatusLine().getReasonPhrase() + " (Http status code "
                         + responseStatusCode + ")");
             }
         } catch (UnknownHostException e) {
             throw new CatalogException("Unknown host: " + e.getMessage());
         } catch (ClientProtocolException e) {
             throw new CatalogException("ClientProtocolException: " + e.getMessage());
         } catch (IOException e) {
             throw new CatalogException(e.getLocalizedMessage());
         }
     }
 
     private DataRecord performFTPQueryToRecord(DataImport dataImport, String filename) {
         if (StringUtils.isBlank(dataImport.getHost()) || StringUtils.isBlank(dataImport.getPath())) {
             throw new CatalogException("Missing host or path configuration for FTP import in OPAC configuration "
                     + "for import configuration '" + dataImport.getTitle() + "'");
         }
         if (StringUtils.isBlank(dataImport.getUsername()) || StringUtils.isBlank(dataImport.getPassword())) {
             throw new CatalogException("Incomplete credentials configured for FTP import in OPAC configuration "
                     + "for import configuration '" + dataImport.getTitle() + "'");
         }
         try {
             ftpLogin(dataImport);
             String filepath = dataImport.getPath() + "/" + filename;
             InputStream inputStream = ftpClient.retrieveFileStream(filepath);
             if (Objects.isNull(inputStream)) {
                 throw new CatalogException("Unable to load file '" + filepath + "' from configured FTP source!");
             }
             String stringContent = IOUtils.toString(inputStream, Charset.defaultCharset());
             inputStream.close();
             DataRecord dataRecord = createRecordFromXMLElement(dataImport, stringContent);
             if (!ftpClient.completePendingCommand()) {
                 throw new CatalogException("Unable to import '" + filename + "'!");
             }
             ftpLogout();
             return dataRecord;
         } catch (IOException e) {
             throw new CatalogException(e.getLocalizedMessage());
         } finally {
             if (ftpClient.isConnected()) {
                 try {
                     ftpClient.disconnect();
                 } catch (IOException e) {
                     logger.error(e.getMessage());
                 }
             }
         }
     }
 
     private DataRecord performQueryToRecord(DataImport dataImport, String queryURL, String identifier)
             throws NoRecordFoundException {
         String fullUrl = queryURL;
         if (!dataImport.getUrlParameters().isEmpty()) {
             fullUrl = fullUrl + AND;
         }
         SearchInterfaceType interfaceType = dataImport.getSearchInterfaceType();
         if (Objects.nonNull(interfaceType)) {
             if (Objects.nonNull(interfaceType.getMaxRecordsString())) {
                 fullUrl = fullUrl + interfaceType.getMaxRecordsString() + EQUALS_OPERAND + "1&";
             }
             if (Objects.nonNull(interfaceType.getQueryString())) {
                 fullUrl = fullUrl + interfaceType.getQueryString() + EQUALS_OPERAND;
             }
         }
         String idPrefix = dataImport.getIdPrefix();
         String prefix = Objects.nonNull(idPrefix) && !identifier.startsWith(idPrefix) ? idPrefix : "";
         String idParameter = SearchInterfaceType.OAI.equals(dataImport.getSearchInterfaceType()) ? OAI_IDENTIFIER
                 : dataImport.getIdParameter();
         String queryParameter = idParameter + EQUALS_OPERAND + prefix + identifier;
         if (SearchInterfaceType.SRU.equals(interfaceType)) {
             fullUrl += URLEncoder.encode(queryParameter, encoding);
         } else {
             fullUrl += queryParameter;
         }
         try {
             this.reinitializeHttpClient(dataImport.getUsername(), dataImport.getPassword());
             logger.debug("Requesting: {}", fullUrl);
             HttpResponse response = httpClient.execute(new HttpGet(fullUrl));
             if (Objects.equals(response.getStatusLine().getStatusCode(), SC_OK)) {
                 HttpEntity httpEntity = response.getEntity();
                 if (Objects.isNull(httpEntity)) {
                     throw new NoRecordFoundException("No record with ID \"" + identifier + "\" found!");
                 }
                 try (InputStream inputStream = httpEntity.getContent()) {
                     String content = IOUtils.toString(inputStream, Charset.defaultCharset());
                     if (Objects.nonNull(interfaceType.getNumberOfRecordsString())
                             && XmlResponseHandler.extractNumberOfRecords(content, interfaceType) < 1) {
                         throw new NoRecordFoundException("No record with ID \"" + identifier + "\" found!");
                     }
                     return createRecordFromXMLElement(dataImport, content);
                 }
             }
             throw new ConfigException("Search Query Request Failed");
         } catch (IOException e) {
             throw new ConfigException(e.getLocalizedMessage());
         }
     }
 
 
     private List<DataRecord> performQueryToMultipleRecords(DataImport dataImport, String queryURL)
             throws IOException, ParserConfigurationException, SAXException, TransformerException {
         List<DataRecord> records = new LinkedList<>();
         HttpGet request = new HttpGet(queryURL);
         RequestConfig.Builder requestConfigBuilder = RequestConfig.custom();
         requestConfigBuilder.setConnectionRequestTimeout(3000);
         requestConfigBuilder.setConnectTimeout(3000);
         request.setConfig(requestConfigBuilder.build());
         try {
             logger.debug("Requesting: {}", queryURL);
             HttpResponse response = httpClient.execute(request);
             int responseStatusCode = response.getStatusLine().getStatusCode();
             if (Objects.equals(responseStatusCode, SC_OK)) {
                 String xmlContent = IOUtils.toString(response.getEntity().getContent(), Charset.defaultCharset());
                 Document document = stringToDocument(xmlContent);
                 NodeList recordNodes = document.getElementsByTagName(MODS_RECORD_TAG);
                 for (int i = 0; i < recordNodes.getLength(); i++) {
                     records.add(createRecordFromXMLElement(dataImport, nodeToString(recordNodes.item(i))));
                 }
             } else {
                 throw new CatalogException(response.getStatusLine().getReasonPhrase() + " (Http status code "
                         + responseStatusCode + ")");
             }
         } catch (ConnectTimeoutException e) {
             throw new CatalogException("Connection exception: OPAC did not respond within the configured time limit!");
         }
         return records;
     }
 
     private SearchResult performHTTPRequest(DataImport dataImport, Map<String, String> searchParameters,
                                             int start, int numberOfRecords) {
         // Query parameters for search request
         LinkedHashMap<String, String> queryParameters = new LinkedHashMap<>(dataImport.getUrlParameters());
         // Search fields and terms of query
         LinkedHashMap<String, String> searchFieldMap = getSearchFieldMap(dataImport, searchParameters);
         SearchInterfaceType interfaceType = dataImport.getSearchInterfaceType();
         try {
             URI queryURL = createQueryURI(dataImport, queryParameters);
             String queryString = queryURL + AND;
             if (Objects.nonNull(interfaceType)) {
                 if (start > 0 && Objects.nonNull(interfaceType.getStartRecordString())) {
                     queryString += interfaceType.getStartRecordString() + EQUALS_OPERAND + start + AND;
                 }
                 if (Objects.nonNull(interfaceType.getMaxRecordsString())) {
                     queryString = queryString + interfaceType.getMaxRecordsString() + EQUALS_OPERAND + numberOfRecords
                             + AND;
                 }
                 if (Objects.nonNull(interfaceType.getQueryString())) {
                     queryString = queryString + interfaceType.getQueryString() + EQUALS_OPERAND;
                 }
             }
             return performQuery(dataImport, queryString + createSearchFieldString(interfaceType, searchFieldMap));
         } catch (URISyntaxException e) {
             throw new CatalogException(e.getLocalizedMessage());
         }
     }
 
     private SearchResult performFtpRequest(DataImport dataImport, String filenamePart, int startIndex, int rows) {
         if (StringUtils.isBlank(dataImport.getUsername()) || StringUtils.isBlank(dataImport.getPassword())) {
             throw new CatalogException("Incomplete credentials configured for FTP import in import configuration '"
                     + dataImport.getTitle() + "'");
         }
         SearchResult searchResult = new SearchResult();
         FTPFileFilter searchFilter = file -> file.isFile() && file.getName().contains(filenamePart);
         try {
             ftpLogin(dataImport);
             FTPFile[] files = ftpClient.listFiles(dataImport.getPath(), searchFilter);
             searchResult.setNumberOfHits(files.length);
             LinkedList<SingleHit> hits = new LinkedList<>();
             for (int i = startIndex; i < Math.min(startIndex + rows, files.length); i++) {
                 hits.add(new SingleHit(files[i].getName(), files[i].getName()));
             }
             searchResult.setHits(hits);
             ftpLogout();
         } catch (IOException e) {
             throw new CatalogException(e.getMessage());
         } finally {
             if (ftpClient.isConnected()) {
                 try {
                     ftpClient.disconnect();
                 } catch (IOException e) {
                     logger.error(e.getMessage());
                 }
             }
         }
         return searchResult;
     }
 
     private DataRecord createRecordFromXMLElement(DataImport dataImport, String xmlContent) {
         DataRecord record = new DataRecord();
         record.setMetadataFormat(MetadataFormat.getMetadataFormat(dataImport.getMetadataFormat().name()));
         record.setFileFormat(FileFormat.getFileFormat(dataImport.getReturnFormat().name()));
         record.setOriginalData(xmlContent);
         return record;
     }
 
     private URI createQueryURI(DataImport dataImport, LinkedHashMap<String, String> searchFields)
             throws URISyntaxException {
         if (dataImport.getPort() > 0) {
             return new URI(dataImport.getScheme(), null, dataImport.getHost(), dataImport.getPort(),
                     dataImport.getPath(), createQueryParameterString(searchFields), null);
         } else {
             return new URI(dataImport.getScheme(), dataImport.getHost(), dataImport.getPath(),
                     createQueryParameterString(searchFields), null);
         }
     }
 
     private String createQueryParameterString(LinkedHashMap<String, String> searchFields) {
         List<BasicNameValuePair> nameValuePairList = searchFields.entrySet().stream()
                 .map(entry -> new BasicNameValuePair(entry.getKey(), entry.getValue()))
                 .collect(Collectors.toList());
         return URLEncodedUtils.format(nameValuePairList, StandardCharsets.UTF_8);
     }
 
     private String createSearchFieldString(SearchInterfaceType interfaceType, LinkedHashMap<String, String> searchFields) {
         List<String> searchOperands = searchFields.entrySet().stream()
                 .map(entry -> entry.getKey() + EQUALS_OPERAND + entry.getValue())
                 .collect(Collectors.toList());
         String searchString = String.join(" AND ", searchOperands);
         if (SearchInterfaceType.SRU.equals(interfaceType)) {
             return URLEncoder.encode(searchString, encoding);
         } else {
             return searchString;
         }
     }
 
     private Document stringToDocument(String xmlContent) throws ParserConfigurationException, IOException,
             SAXException {
         DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
         documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
         DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
         return documentBuilder.parse(new InputSource(new StringReader(xmlContent)));
     }
 
     private String nodeToString(Node node) throws TransformerException {
         StringWriter writer = new StringWriter();
         Transformer transformer = TransformerFactory.newInstance().newTransformer();
         transformer.transform(new DOMSource(node), new StreamResult(writer));
         return writer.toString();
     }
 
     private LinkedHashMap<String, String> getSearchFieldMap(DataImport dataImport, Map<String, String> searchFields) {
         LinkedHashMap<String, String> searchFieldMap = new LinkedHashMap<>();
         String idPrefix = dataImport.getIdPrefix();
         if (SearchInterfaceType.OAI.equals(dataImport.getSearchInterfaceType()) && searchFields.size() == 1) {
             String value = new LinkedList<>(searchFields.values()).getFirst();
             if (StringUtils.isBlank(idPrefix) || value.startsWith(idPrefix)) {
                 searchFieldMap.put(OAI_IDENTIFIER, value);
             } else {
                 searchFieldMap.put(OAI_IDENTIFIER, idPrefix + value);
             }
             return searchFieldMap;
         }
         String idParameter = dataImport.getIdParameter();
         for (Map.Entry<String, String> entry : searchFields.entrySet()) {
             String searchField = dataImport.getSearchFields().get(entry.getKey());
             if (StringUtils.isNotBlank(idPrefix) && StringUtils.isNotBlank(idParameter)
                     && idParameter.equals(searchField) && !entry.getValue().startsWith(idPrefix)) {
                 searchFieldMap.put(searchField, idPrefix + entry.getValue());
             } else {
                 searchFieldMap.put(searchField, entry.getValue());
             }
         }
         return searchFieldMap;
     }
 
     private void ftpLogin(DataImport dataImport) throws IOException {
         if (dataImport.getPort() > 0) {
             ftpClient.connect(dataImport.getHost(), dataImport.getPort());
         } else {
             ftpClient.connect(dataImport.getHost());
         }
         boolean loginSuccessful = ftpClient.login(dataImport.getUsername(), dataImport.getPassword());
         if (!loginSuccessful) {
             String replyString = ftpClient.getReplyString();
             int replyCode = ftpClient.getReplyCode();
             ftpClient.logout();
             ftpClient.disconnect();
             throw new CatalogException("FTP server login failed: " + replyString + " (" + replyCode + ")");
         }
     }
 
     private void ftpLogout() throws IOException {
         ftpClient.logout();
         ftpClient.disconnect();
     }
 }