Coverage Summary for Class: MetsXmlElementAccess (org.kitodo.dataformat.access)
Class |
Class, %
|
Method, %
|
Line, %
|
MetsXmlElementAccess |
100%
(1/1)
|
100%
(23/23)
|
91,7%
(155/169)
|
/*
* (c) Kitodo. Key to digital objects e. V. <contact@kitodo.org>
*
* This file is part of the Kitodo project.
*
* It is licensed under GNU General Public License version 3 or later.
*
* For the full copyright and license information, please read the
* GPL3-License.txt file that was distributed with this source code.
*/
package org.kitodo.dataformat.access;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import javax.xml.bind.Unmarshaller;
import javax.xml.datatype.DatatypeConfigurationException;
import javax.xml.datatype.DatatypeFactory;
import javax.xml.datatype.XMLGregorianCalendar;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.kitodo.api.dataformat.MediaVariant;
import org.kitodo.api.dataformat.PhysicalDivision;
import org.kitodo.api.dataformat.ProcessingNote;
import org.kitodo.api.dataformat.Workpiece;
import org.kitodo.api.dataformat.mets.MetsXmlElementAccessInterface;
import org.kitodo.dataformat.metskitodo.DivType;
import org.kitodo.dataformat.metskitodo.FileType;
import org.kitodo.dataformat.metskitodo.Mets;
import org.kitodo.dataformat.metskitodo.MetsType;
import org.kitodo.dataformat.metskitodo.MetsType.FileSec;
import org.kitodo.dataformat.metskitodo.MetsType.FileSec.FileGrp;
import org.kitodo.dataformat.metskitodo.MetsType.MetsHdr;
import org.kitodo.dataformat.metskitodo.MetsType.MetsHdr.Agent;
import org.kitodo.dataformat.metskitodo.MetsType.MetsHdr.MetsDocumentID;
import org.kitodo.dataformat.metskitodo.MetsType.StructLink;
import org.kitodo.dataformat.metskitodo.StructLinkType.SmLink;
import org.kitodo.dataformat.metskitodo.StructMapType;
import org.kitodo.utils.JAXBContextCache;
/**
* The administrative structure of the product of an element that passes through
* a Production workflow. The file format for this management structure is METS
* XML after the ZVDD DFG Viewer Application Profile.
*
* <p>
* A {@code Workpiece} has two essential characteristics: {@link FileXmlElementAccess}s and
* an outline {@link DivXmlElementAccess}. {@code PhysicalDivision}s are the types of every
* single digital medium on a conceptual level, such as the individual pages of
* a book. Each {@code PhysicalDivision} can be in different {@link UseXmlAttributeAccess}s (for
* example, in different resolutions or file formats). Each {@code MediaVariant}
* of a {@code PhysicalDivision} resides in a {@link FLocatXmlElementAccess} in the data store.
*
* <p>
* The {@code LogicalDivision} is a tree structure that can be finely
* subdivided, e.g. a book, in which the chapters, in it individual elements
* such as tables or figures. Each outline level points to the
* {@code PhysicalDivision}s that belong to it via {@link AreaXmlElementAccess}s.
* Currently, a {@code View} always contains exactly one {@code PhysicalDivision} unit,
* here a simple expandability is provided, so that in a future version excerpts
* from {@code PhysicalDivision}s can be described. Each outline level can be described
* with any {@link MetadataXmlElementsAccess}.
*
* @see "https://www.zvdd.de/fileadmin/AGSDD-Redaktion/METS_Anwendungsprofil_2.0.pdf"
*/
public class MetsXmlElementAccess implements MetsXmlElementAccessInterface {
private static final Logger logger = LogManager.getLogger(MetsXmlElementAccess.class);
/**
* The data object of this mets XML element access.
*/
private final Workpiece workpiece;
/**
* Creates an empty workpiece. This is the default state when the editor
* starts. You can either load a file or create a new one.
*/
public MetsXmlElementAccess() {
workpiece = new Workpiece();
}
/**
* Creates a workpiece from a METS XML structure. Due to limitations of the
* API, this can only be done by calling {@link #read(InputStream)} and then
* replacing the content of the current editor, but at least the
* implementation is clean.
*
* @param mets
* METS XML structure to read
*/
private MetsXmlElementAccess(Mets mets) {
this();
initialize(mets);
}
private void initialize(Mets mets) {
setWorkpieceFromMetsHeader(mets);
Map<String, FileXmlElementAccess> divIDsToPhysicalDivisions = getReferenceDivIdsToPhysicalDivisions(mets);
if (mets.getStructLink() == null) {
mets.setStructLink(new StructLink());
}
Map<String, List<FileXmlElementAccess>> physicalDivisionsMap = new HashMap<>();
for (Object smLinkOrSmLinkGrp : mets.getStructLink().getSmLinkOrSmLinkGrp()) {
if (smLinkOrSmLinkGrp instanceof SmLink) {
SmLink smLink = (SmLink) smLinkOrSmLinkGrp;
physicalDivisionsMap.computeIfAbsent(smLink.getFrom(), any -> new LinkedList<>());
physicalDivisionsMap.get(smLink.getFrom()).add(divIDsToPhysicalDivisions.get(smLink.getTo()));
}
}
workpiece.setLogicalStructure(getStructMapsStreamByType(mets, "LOGICAL")
.map(structMap -> new DivXmlElementAccess(structMap.getDiv(), mets, physicalDivisionsMap, 1))
.collect(Collectors.toList())
.iterator().next());
}
private Map<String, FileXmlElementAccess> getReferenceDivIdsToPhysicalDivisions(Mets mets) {
FileSec fileSec = mets.getFileSec();
Map<String, MediaVariant> useXmlAttributeAccess = fileSec != null
? fileSec.getFileGrp().parallelStream().filter(fileGrp -> !fileGrp.getFile().isEmpty())
.map(UseXmlAttributeAccess::new)
.collect(Collectors.toMap(
newUseXmlAttributeAccess -> newUseXmlAttributeAccess.getMediaVariant().getUse(),
UseXmlAttributeAccess::getMediaVariant))
: new HashMap<>();
Optional<StructMapType> optionalPhysicalStructMap = getStructMapsStreamByType(mets, "PHYSICAL").findFirst();
Map<String, FileXmlElementAccess> divIDsToPhysicalDivisions = new HashMap<>();
if (optionalPhysicalStructMap.isPresent()) {
DivType div = optionalPhysicalStructMap.get().getDiv();
Map<FileType, String> fileUseByFileCache = createFileUseByFileCache(mets);
FileXmlElementAccess fileXmlElementAccess = new FileXmlElementAccess(
div, mets, useXmlAttributeAccess, fileUseByFileCache
);
PhysicalDivision physicalDivision = fileXmlElementAccess.getPhysicalDivision();
workpiece.setPhysicalStructure(physicalDivision);
divIDsToPhysicalDivisions.put(div.getID(), fileXmlElementAccess);
readMediaUnitsTreeRecursive(
div, mets, useXmlAttributeAccess, physicalDivision, divIDsToPhysicalDivisions, fileUseByFileCache
);
}
return divIDsToPhysicalDivisions;
}
private void setWorkpieceFromMetsHeader(Mets mets) {
MetsHdr metsHdr = mets.getMetsHdr();
if (Objects.nonNull(metsHdr)) {
GregorianCalendar gregorianCalendar;
if (Objects.nonNull(metsHdr.getCREATEDATE())) {
gregorianCalendar = metsHdr.getCREATEDATE().toGregorianCalendar();
} else {
gregorianCalendar = new GregorianCalendar();
}
workpiece.setCreationDate(gregorianCalendar);
for (Agent agent : metsHdr.getAgent()) {
workpiece.getEditHistory().add(new AgentXmlElementAccess(agent).getProcessingNote());
}
MetsDocumentID metsDocumentID = metsHdr.getMetsDocumentID();
if (Objects.nonNull(metsDocumentID)) {
workpiece.setId(metsDocumentID.getValue());
}
}
}
private void readMediaUnitsTreeRecursive(DivType div, Mets mets, Map<String, MediaVariant> useXmlAttributeAccess,
PhysicalDivision physicalDivision, Map<String, FileXmlElementAccess> divIDsToPhysicalDivisions,
Map<FileType, String> fileUseByFileCache) {
for (DivType child : div.getDiv()) {
FileXmlElementAccess fileXmlElementAccess = new FileXmlElementAccess(
child, mets, useXmlAttributeAccess, fileUseByFileCache
);
PhysicalDivision childPhysicalDivision = fileXmlElementAccess.getPhysicalDivision();
physicalDivision.getChildren().add(childPhysicalDivision);
divIDsToPhysicalDivisions.put(child.getID(), fileXmlElementAccess);
readMediaUnitsTreeRecursive(
child, mets, useXmlAttributeAccess, childPhysicalDivision, divIDsToPhysicalDivisions, fileUseByFileCache
);
}
}
private MetsXmlElementAccess(Workpiece workpiece) {
this.workpiece = workpiece;
}
/**
* The method helps to read {@code <structMap>}s from METS.
*
* @param mets
* METS that can be read from
* @param type
* type of the {@code <structMap>} to read
* @return a stream of {@code <structMap>}s
*/
private static final Stream<StructMapType> getStructMapsStreamByType(Mets mets, String type) {
return mets.getStructMap().parallelStream().filter(structMap -> structMap.getTYPE().equals(type));
}
/**
* Reads METS from an InputStream. JAXB is used to parse the XML.
*
* @param in
* InputStream to read from
*/
@Override
public Workpiece read(InputStream in) throws IOException {
try {
JAXBContext jc = JAXBContextCache.getJAXBContext(Mets.class);
Unmarshaller unmarshaller = jc.createUnmarshaller();
Mets mets = (Mets) unmarshaller.unmarshal(in);
return new MetsXmlElementAccess(mets).workpiece;
} catch (JAXBException e) {
if (e.getCause() instanceof IOException) {
throw (IOException) e.getCause();
} else {
throw new IOException(e.getMessage(), e);
}
}
}
/**
* Writes the contents of this workpiece as a METS file into an output
* stream.
*
* @param out
* writable output stream
* @throws IOException
* if the output device has an error
*/
@Override
public void save(Workpiece workpiece, OutputStream out) throws IOException {
try {
JAXBContext context = JAXBContextCache.getJAXBContext(Mets.class);
Marshaller marshal = context.createMarshaller();
marshal.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
marshal.marshal(new MetsXmlElementAccess(workpiece).toMets(), out);
} catch (JAXBException e) {
if (e.getCause() instanceof IOException) {
throw (IOException) e.getCause();
} else {
throw new IOException(e.getMessage(), e);
}
}
}
/**
* Generates a METS XML structure from this workpiece in the form of Java
* objects in the main memory.
*
* @return a METS XML structure from this workpiece
*/
private Mets toMets() {
Mets mets = new Mets();
mets.setMetsHdr(generateMetsHdr());
Map<URI, FileType> mediaFilesToIDFiles = new HashMap<>();
mets.setFileSec(generateFileSec(mediaFilesToIDFiles));
Map<PhysicalDivision, String> physicalDivisionIDs = new HashMap<>();
mets.getStructMap().add(generatePhysicalStructMap(mediaFilesToIDFiles, physicalDivisionIDs, mets));
LinkedList<Pair<String, String>> smLinkData = new LinkedList<>();
StructMapType logical = new StructMapType();
logical.setTYPE("LOGICAL");
logical.setDiv(new DivXmlElementAccess(workpiece.getLogicalStructure()).toDiv(physicalDivisionIDs, smLinkData, mets));
mets.getStructMap().add(logical);
mets.setStructLink(createStructLink(smLinkData));
return mets;
}
/**
* Creates the header of the METS file. The header area stores the time
* stamp, the ID and the processing notes.
*
* @return the header of the METS file
*/
private MetsHdr generateMetsHdr() {
MetsHdr metsHdr = new MetsHdr();
metsHdr.setCREATEDATE(convertDate(workpiece.getCreationDate()));
metsHdr.setLASTMODDATE(convertDate(new GregorianCalendar()));
if (workpiece.getId() != null) {
MetsDocumentID id = new MetsDocumentID();
id.setValue(workpiece.getId());
metsHdr.setMetsDocumentID(id);
}
for (ProcessingNote processingNote : workpiece.getEditHistory()) {
metsHdr.getAgent().add(new AgentXmlElementAccess(processingNote).toAgent());
}
return metsHdr;
}
/**
* Creates an object of class XMLGregorianCalendar. Creating this
* JAXB-specific class is quite complicated and has therefore been
* outsourced to a separate method.
*
* @param gregorianCalendar
* value of the calendar
* @return an object of class XMLGregorianCalendar
*/
private static XMLGregorianCalendar convertDate(GregorianCalendar gregorianCalendar) {
DatatypeFactory datatypeFactory;
try {
datatypeFactory = DatatypeFactory.newInstance();
} catch (DatatypeConfigurationException e) {
String message = e.getMessage();
throw new NoClassDefFoundError(message != null ? message
: "Implementation of DatatypeFactory not available or cannot be instantiated.");
}
return datatypeFactory.newXMLGregorianCalendar(gregorianCalendar);
}
/**
* Creates the file section. In the file section of a METS file after the
* ZVDD DFG Viewer Application Profile, the files are declared in exactly
* the opposite way as they are managed in Production. That is, there are
* file groups, each file group accommodating the files of a media variant.
* Therefore, the physical divisions are first resolved according to their media
* variants, then the corresponding XML elements are generated.
*
* @param mediaFilesToIDFiles
* In this map, for each physical division, the corresponding XML file
* element is added, so that it can be used for linking later.
* @return an object of type FileSec
*/
private FileSec generateFileSec(Map<URI, FileType> mediaFilesToIDFiles) {
FileSec fileSec = new FileSec();
Map<UseXmlAttributeAccess, Set<URI>> useToPhysicalDivisions = new HashMap<>();
Map<Pair<UseXmlAttributeAccess, URI>, String> fileIds = new HashMap<>();
generateFileSecRecursive(workpiece.getPhysicalStructure(), useToPhysicalDivisions, fileIds);
for (Entry<UseXmlAttributeAccess, Set<URI>> fileGrpData : useToPhysicalDivisions.entrySet()) {
FileGrp fileGrp = new FileGrp();
UseXmlAttributeAccess useXmlAttributeAccess = fileGrpData.getKey();
fileGrp.setUSE(useXmlAttributeAccess.getMediaVariant().getUse());
String mimeType = useXmlAttributeAccess.getMediaVariant().getMimeType();
Map<URI, FileType> files = fileGrpData.getValue().parallelStream()
.map(uri -> Pair.of(uri,
new FLocatXmlElementAccess(uri).toFile(mimeType,
fileIds.get(Pair.of(useXmlAttributeAccess, uri)))))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
mediaFilesToIDFiles.putAll(files);
fileGrp.getFile().addAll(files.values());
fileSec.getFileGrp().add(fileGrp);
}
return fileSec;
}
private void generateFileSecRecursive(PhysicalDivision physicalDivision, Map<UseXmlAttributeAccess, Set<URI>> useToPhysicalDivisions,
Map<Pair<UseXmlAttributeAccess, URI>, String> fileIds) {
for (Entry<MediaVariant, URI> variantEntry : physicalDivision.getMediaFiles().entrySet()) {
UseXmlAttributeAccess use = new UseXmlAttributeAccess(variantEntry.getKey());
useToPhysicalDivisions.computeIfAbsent(use, any -> new HashSet<>());
URI uri = variantEntry.getValue();
useToPhysicalDivisions.get(use).add(uri);
if (physicalDivision instanceof PhysicalDivisionMetsReferrerStorage) {
fileIds.put(Pair.of(use, uri), ((PhysicalDivisionMetsReferrerStorage) physicalDivision).getFileId(uri));
}
}
for (PhysicalDivision child : physicalDivision.getChildren()) {
generateFileSecRecursive(child, useToPhysicalDivisions, fileIds);
}
}
/**
* Creates the physical struct map. In the physical struct map, the
* individual files with their variants are enumerated and labeled.
*
* @param mediaFilesToIDFiles
* A map of the media files to the XML file elements used to
* declare them in the file section. To output a link to the ID,
* the XML element must be passed to JAXB.
* @param physicalDivisionIDs
* In this map, the function returns the assigned identifier for
* each physical division so that the link pairs of the struct link
* section can be formed later.
* @param mets
* the METS structure in which the metadata is added
* @return the physical struct map
*/
private StructMapType generatePhysicalStructMap(
Map<URI, FileType> mediaFilesToIDFiles, Map<PhysicalDivision, String> physicalDivisionIDs, MetsType mets) {
StructMapType physical = new StructMapType();
physical.setTYPE("PHYSICAL");
physical.setDiv(
generatePhysicalStructMapRecursive(workpiece.getPhysicalStructure(), mediaFilesToIDFiles, physicalDivisionIDs, mets));
return physical;
}
private DivType generatePhysicalStructMapRecursive(PhysicalDivision physicalDivision, Map<URI, FileType> mediaFilesToIDFiles,
Map<PhysicalDivision, String> physicalDivisionIDs, MetsType mets) {
DivType div = new FileXmlElementAccess(physicalDivision).toDiv(mediaFilesToIDFiles, physicalDivisionIDs, mets);
for (PhysicalDivision child : physicalDivision.getChildren()) {
div.getDiv().add(generatePhysicalStructMapRecursive(child, mediaFilesToIDFiles, physicalDivisionIDs, mets));
}
return div;
}
/**
* Creates the struct link section. The struct link section stores which
* files are attached to which nodes and leaves of the description
* structure.
*
* @param smLinkData
* The list of related IDs
* @return the struct link section
*/
private StructLink createStructLink(LinkedList<Pair<String, String>> smLinkData) {
StructLink structLink = new StructLink();
structLink.getSmLinkOrSmLinkGrp().addAll(smLinkData.parallelStream().map(entry -> {
SmLink smLink = new SmLink();
smLink.setFrom(entry.getLeft());
smLink.setTo(entry.getRight());
return smLink;
}).collect(Collectors.toList()));
return structLink;
}
/**
* Create a map that stores a file's use parameter for each existing file (e.g. MAX, THUMB, LOCAL).
*
* @param mets the mets file
* @return the map from file to file group use
*/
private Map<FileType, String> createFileUseByFileCache(Mets mets) {
HashMap<FileType, String> fileUseMap = new HashMap<>();
FileSec fileSec = mets.getFileSec();
if (Objects.nonNull(fileSec)) {
for (FileGrp fileGrp : fileSec.getFileGrp()) {
String use = fileGrp.getUSE();
for (FileType file : fileGrp.getFile()) {
if (fileUseMap.containsKey(file)) {
throw new IllegalArgumentException(
"Corrupt file: file with id " + file.getID() + " is part of multiple groups"
);
} else {
fileUseMap.put(file, use);
}
}
}
}
return fileUseMap;
}
}