Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use libxml2 for ASiC parsing #593

Merged
merged 1 commit into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 0 additions & 63 deletions etc/schema/OpenDocument_manifest.xsd

This file was deleted.

2 changes: 1 addition & 1 deletion etc/schema/OpenDocument_manifest_v1_2.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
</xs:complexType>
</xs:element>
<xs:attributeGroup name="manifest-attlist">
<xs:attribute name="version" use="required" form="qualified">
<xs:attribute name="version" form="qualified"><!-- use="required" removed to keep backward compatibility -->
<xs:simpleType>
<xs:restriction base="xs:token">
<xs:enumeration value="1.2"/>
Expand Down
127 changes: 40 additions & 87 deletions src/ASiC_E.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,15 @@
#include "Conf.h"
#include "DataFile_p.h"
#include "SignatureXAdES_LTA.h"
#include "XMLDocument.h"
#include "crypto/Digest.h"
#include "crypto/Signer.h"
#include "util/File.h"
#include "util/log.h"
#include "util/ZipSerialize.h"
#include "xml/OpenDocument_manifest.hxx"
#include "xml/OpenDocument_manifest_v1_2.hxx"
#include "xml/SecureDOMParser.h"

#include <xercesc/util/OutOfMemoryException.hpp>

#include <algorithm>
#include <set>
#include <sstream>

using namespace digidoc;
using namespace digidoc::util;
Expand All @@ -43,7 +40,7 @@ const string_view ASiC_E::ASIC_TM_PROFILE = "time-mark";
const string_view ASiC_E::ASIC_TS_PROFILE = "time-stamp";
const string_view ASiC_E::ASIC_TSA_PROFILE = "time-stamp-archive";
const string_view ASiC_E::ASIC_TMA_PROFILE = "time-mark-archive";
const string ASiC_E::MANIFEST_NAMESPACE = "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0";
constexpr string_view MANIFEST_NS {"urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"};

class ASiC_E::Private
{
Expand Down Expand Up @@ -166,7 +163,7 @@ unique_ptr<Container> ASiC_E::openInternal(const string &path)

/**
* Creates BDoc container manifest file and returns its path.
*
*
* Note: If non-ascii characters are present in XML data, we depend on the LANG variable to be set properly
* (see iconv --list for the list of supported encoding values for libiconv).
*
Expand All @@ -177,40 +174,25 @@ unique_ptr<Container> ASiC_E::openInternal(const string &path)
void ASiC_E::createManifest(ostream &os)
{
DEBUG("ASiC_E::createManifest()");

try
{
manifest_1_2::Manifest manifest(manifest_1_2::Manifest::VersionType::cxx_1_2);
manifest.file_entry().push_back(make_unique<manifest_1_2::File_entry>("/", mediaType()));
for(const DataFile *file: dataFiles())
manifest.file_entry().push_back(make_unique<manifest_1_2::File_entry>(file->fileName(), file->mediaType()));

xml_schema::NamespaceInfomap map;
map["manifest"].name = ASiC_E::MANIFEST_NAMESPACE;
manifest_1_2::manifest(os, manifest, map, {}, xml_schema::Flags::dont_initialize);
if(os.fail())
THROW("Failed to create manifest XML");
}
catch(const xercesc::DOMException &e)
{
try {
string result = xsd::cxx::xml::transcode<char>(e.getMessage());
THROW("Failed to create manifest XML file. Error: %s", result.c_str());
} catch(const xsd::cxx::xml::invalid_utf16_string & /* ex */) {
THROW("Failed to create manifest XML file.");
}
}
catch(const xml_schema::Exception &e)
{
THROW("Failed to create manifest XML file. Error: %s", e.what());
}
auto doc = XMLDocument::create("manifest", MANIFEST_NS, "manifest");
doc.setProperty("version", "1.2", MANIFEST_NS);
auto add = [&doc](string_view path, string_view mime) {
auto file = doc.addChild("file-entry", MANIFEST_NS);
file.setProperty("full-path", path, MANIFEST_NS);
file.setProperty("media-type", mime, MANIFEST_NS);
};
add("/", mediaType());
for(const DataFile *file: dataFiles())
add(file->fileName(), file->mediaType());
if(!doc.save(os))
THROW("Failed to create manifest XML");
}

/**
* Parses manifest file and checks that files described in manifest exist, also
* checks that no extra file do exist that are not described in manifest.xml.
*
* Note: If non-ascii characters are present in XML data, we depend on the LANG variable to be set properly
* Note: If non-ascii characters are present in XML data, we depend on the LANG variable to be set properly
* (see iconv --list for the list of supported encoding values for libiconv).
*
* @param path directory on disk of the BDOC container.
Expand All @@ -231,45 +213,45 @@ void ASiC_E::parseManifestAndLoadFiles(const ZipSerialize &z)
{
stringstream manifestdata;
z.extract("META-INF/manifest.xml", manifestdata);
xml_schema::Properties p;
p.schema_location(ASiC_E::MANIFEST_NAMESPACE,
File::fullPathUrl(Conf::instance()->xsdPath() + "/OpenDocument_manifest.xsd"));
unique_ptr<xercesc::DOMDocument> doc = SecureDOMParser(p.schema_location(), true).parseIStream(manifestdata);
unique_ptr<manifest::Manifest> manifest = manifest::manifest(*doc, {}, p);

set<string> manifestFiles;
set<string_view> manifestFiles;
bool mimeFound = false;
for(const manifest::File_entry &file: manifest->file_entry())
auto doc = XMLDocument::openStream(manifestdata, "manifest", MANIFEST_NS);
if(!doc.validateSchema(File::path(Conf::instance()->xsdPath(), "OpenDocument_manifest_v1_2.xsd")))
THROW("Failed to parse manifest XML");
for(auto file = doc/"file-entry"; file; file++)
{
DEBUG("full_path = '%s', media_type = '%s'", file.full_path().c_str(), file.media_type().c_str());
auto full_path = file.property("full-path", MANIFEST_NS);
auto media_type = file.property("media-type", MANIFEST_NS);
DEBUG("full_path = '%s', media_type = '%s'", full_path.data(), media_type.data());

if(manifestFiles.find(file.full_path()) != manifestFiles.end())
THROW("Manifest multiple entries defined for file '%s'.", file.full_path().c_str());
if(manifestFiles.find(full_path) != manifestFiles.end())
THROW("Manifest multiple entries defined for file '%s'.", full_path.data());

// ODF does not specify that mimetype should be first in manifest
if(file.full_path() == "/")
if(full_path == "/")
{
if(mediaType() != file.media_type())
THROW("Manifest has incorrect container media type defined '%s', expecting '%s'.", file.media_type().c_str(), mediaType().c_str());
if(mediaType() != media_type)
THROW("Manifest has incorrect container media type defined '%s', expecting '%s'.", media_type.data(), mediaType().c_str());
mimeFound = true;
continue;
}
if(file.full_path().back() == '/') // Skip Directory entries
if(full_path.back() == '/') // Skip Directory entries
continue;

auto fcount = size_t(count(list.cbegin(), list.cend(), file.full_path()));
auto fcount = size_t(count(list.cbegin(), list.cend(), full_path));
if(fcount < 1)
THROW("File described in manifest '%s' does not exist in container.", file.full_path().c_str());
THROW("File described in manifest '%s' does not exist in container.", full_path.data());
if(fcount > 1)
THROW("Found multiple references of file '%s' in zip container.", file.full_path().c_str());
THROW("Found multiple references of file '%s' in zip container.", full_path.data());

manifestFiles.insert(file.full_path());
manifestFiles.insert(full_path);
if(mediaType() == MIMETYPE_ADOC &&
(file.full_path().compare(0, 9, "META-INF/") == 0 ||
file.full_path().compare(0, 9, "metadata/") == 0))
d->metadata.push_back(new DataFilePrivate(dataStream(file.full_path(), z), file.full_path(), file.media_type()));
(full_path.compare(0, 9, "META-INF/") == 0 ||
full_path.compare(0, 9, "metadata/") == 0))
d->metadata.push_back(new DataFilePrivate(dataStream(string(full_path), z), string(full_path), string(media_type)));
else
addDataFilePrivate(dataStream(file.full_path(), z), file.full_path(), file.media_type());
addDataFilePrivate(dataStream(string(full_path), z), string(full_path), string(media_type));
}
if(!mimeFound)
THROW("Manifest is missing mediatype file entry.");
Expand Down Expand Up @@ -307,35 +289,6 @@ void ASiC_E::parseManifestAndLoadFiles(const ZipSerialize &z)
THROW("File '%s' found in container is not described in manifest.", file.c_str());
}
}
catch(const xercesc::DOMException &e)
{
try {
string result = xsd::cxx::xml::transcode<char>(e.getMessage());
THROW("Failed to create manifest XML file. Error: %s", result.c_str());
} catch(const xsd::cxx::xml::invalid_utf16_string & /* ex */) {
THROW("Failed to create manifest XML file.");
}
}
catch(const xsd::cxx::xml::invalid_utf16_string &)
{
THROW("Failed to parse manifest XML: %s", Conf::instance()->xsdPath().c_str());
}
catch(const xsd::cxx::xml::properties<char>::argument & /* e */)
{
THROW("Failed to parse manifest XML: %s", Conf::instance()->xsdPath().c_str());
}
catch(const xsd::cxx::tree::unexpected_element<char> &e)
{
THROW("Failed to parse manifest XML: %s %s %s", Conf::instance()->xsdPath().c_str(), e.expected_name().c_str(), e.encountered_name().c_str());
}
catch(const xml_schema::Exception& e)
{
THROW("Failed to parse manifest XML: %s (xsd path: %s)", e.what(), Conf::instance()->xsdPath().c_str());
}
catch(const xercesc::OutOfMemoryException &)
{
THROW("Failed to parse manifest XML: out of memory");
}
catch(const Exception &e)
{
THROW_CAUSE(e, "Failed to parse manifest");
Expand Down
1 change: 0 additions & 1 deletion src/ASiC_E.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ namespace digidoc
static const std::string_view ASIC_TS_PROFILE;
static const std::string_view ASIC_TMA_PROFILE;
static const std::string_view ASIC_TSA_PROFILE;
static const std::string MANIFEST_NAMESPACE;

~ASiC_E() final;
void save(const std::string &path = {}) final;
Expand Down
8 changes: 4 additions & 4 deletions src/ASiContainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ void ASiContainer::addDataFile(const string &path, const string &mediaType)

ZipSerialize::Properties prop { appInfo(), File::modifiedTime(path), File::fileSize(path) };
bool useTempFile = prop.size > MAX_MEM_FILE;
zproperty(File::fileName(path), std::move(prop));
zproperty(fileName, std::move(prop));
unique_ptr<istream> is;
if(useTempFile)
{
Expand All @@ -185,7 +185,7 @@ void ASiContainer::addDataFile(const string &path, const string &mediaType)
*data << file.rdbuf();
is = std::move(data);
}
addDataFilePrivate(std::move(is), fileName, mediaType);
addDataFilePrivate(std::move(is), std::move(fileName), mediaType);
}

void ASiContainer::addDataFile(unique_ptr<istream> is, const string &fileName, const string &mediaType)
Expand All @@ -208,9 +208,9 @@ void ASiContainer::addDataFileChecks(const string &fileName, const string &media
THROW("MediaType does not meet format requirements (RFC2045, section 5.1) '%s'.", mediaType.c_str());
}

void ASiContainer::addDataFilePrivate(unique_ptr<istream> is, const string &fileName, const string &mediaType)
void ASiContainer::addDataFilePrivate(unique_ptr<istream> is, string fileName, string mediaType)
{
d->documents.push_back(new DataFilePrivate(std::move(is), fileName, mediaType));
d->documents.push_back(new DataFilePrivate(std::move(is), std::move(fileName), std::move(mediaType)));
}

/**
Expand Down
2 changes: 1 addition & 1 deletion src/ASiContainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ namespace digidoc
protected:
ASiContainer(const std::string &mimetype);

void addDataFilePrivate(std::unique_ptr<std::istream> is, const std::string &fileName, const std::string &mediaType);
void addDataFilePrivate(std::unique_ptr<std::istream> is, std::string fileName, std::string mediaType);
Signature* addSignature(std::unique_ptr<Signature> &&signature);
std::unique_ptr<std::iostream> dataStream(const std::string &path, const ZipSerialize &z) const;
std::unique_ptr<ZipSerialize> load(const std::string &path, bool requireMimetype, const std::set<std::string> &supported);
Expand Down
7 changes: 0 additions & 7 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,6 @@ configure_file( ${CMAKE_SOURCE_DIR}/etc/digidocpp.conf.cmake digidocpp.conf )

set(SCHEMA_DIR ${CMAKE_SOURCE_DIR}/etc/schema)
set(XML_DIR ${CMAKE_CURRENT_BINARY_DIR}/xml)
XSD_SCHEMA( xsd_SRCS IGNORE ${XML_DIR} ${SCHEMA_DIR}/OpenDocument_manifest.xsd
--root-element manifest
--namespace-map urn:oasis:names:tc:opendocument:xmlns:manifest:1.0=digidoc::manifest )
XSD_SCHEMA( xsd_SRCS IGNORE ${XML_DIR} ${SCHEMA_DIR}/OpenDocument_manifest_v1_2.xsd
--root-element manifest
--namespace-map urn:oasis:names:tc:opendocument:xmlns:manifest:1.0=digidoc::manifest_1_2 )
XSD_SCHEMA( xsd_SRCS XML_HEADER ${XML_DIR} ${SCHEMA_DIR}/xmldsig-core-schema.xsd
--root-element-none
--namespace-map http://www.w3.org/2000/09/xmldsig\#=digidoc::dsig
Expand Down Expand Up @@ -82,7 +76,6 @@ file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/tslcerts.h "};\n}")

set( SCHEMA_FILES
${SCHEMA_DIR}/conf.xsd
${SCHEMA_DIR}/OpenDocument_manifest.xsd
${SCHEMA_DIR}/OpenDocument_manifest_v1_2.xsd
${SCHEMA_DIR}/xmldsig-core-schema.xsd
${SCHEMA_DIR}/XAdES01903v132-201601.xsd
Expand Down
Loading