Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use libxml2 for DDoc parsing #595

Merged
merged 1 commit into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/ASiC_E.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ void ASiC_E::parseManifestAndLoadFiles(const ZipSerialize &z)

set<string_view> manifestFiles;
bool mimeFound = false;
auto doc = XMLDocument::openStream(manifestdata, "manifest", MANIFEST_NS);
auto doc = XMLDocument::openStream(manifestdata, {"manifest", MANIFEST_NS});
if(!doc.validateSchema(File::path(Conf::instance()->xsdPath(), "OpenDocument_manifest_v1_2.xsd")))
THROW("Failed to parse manifest XML");
for(auto file = doc/"file-entry"; file; file++)
Expand Down
133 changes: 36 additions & 97 deletions src/SiVaContainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,35 +27,27 @@
#include "ASiContainer.h"
#include "Conf.h"
#include "DataFile_p.h"
#include "Signature.h"
#include "XMLDocument.h"
#include "crypto/Connect.h"
#include "crypto/Digest.h"
#include "util/File.h"
#include "util/log.h"
#include "util/ZipSerialize.h"
#include "xml/xml.hxx"
#include "xml/SecureDOMParser.h"

#include "json.hpp"

#include <xercesc/dom/DOM.hpp>
#include <xercesc/framework/MemBufFormatTarget.hpp>
#include <xercesc/util/Base64.hpp>

#define XSD_CXX11
#include <xsd/cxx/xml/string.hxx>
#include <xsd/cxx/xml/dom/serialization-source.hxx>

#include <algorithm>
#include <fstream>
#include <sstream>

using namespace digidoc;
using namespace digidoc::util;
using namespace std;
using namespace xercesc;
using json = nlohmann::json;

static auto base64_decode(const XMLCh *in) {
template <class T>
constexpr T base64_enc_size(T n) noexcept
{
return ((n + 2) / 3) << 2;
}

static auto base64_decode(string_view data) {
static constexpr array<uint8_t, 128> T{
0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64,
0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64,
Expand All @@ -70,9 +62,8 @@ static auto base64_decode(const XMLCh *in) {
auto out = make_unique<stringstream>();
int value = 0;
int bits = -8;
for(; in; ++in)
for(auto c: data)
{
const char c(*in);
if(c == '\r' || c == '\n' || c == ' ')
continue;
uint8_t check = T[c];
Expand Down Expand Up @@ -197,7 +188,7 @@ SiVaContainer::SiVaContainer(const string &path, ContainerOpenCB *cb, bool useHa
if(useHashCode && cb && !cb->validateOnline())
THROW("Online validation disabled");

array<XMLByte, 4800> buf{};
array<unsigned char, 4800> buf{};
string b64;
is->clear();
is->seekg(0);
Expand All @@ -207,11 +198,10 @@ SiVaContainer::SiVaContainer(const string &path, ContainerOpenCB *cb, bool useHa
if(is->gcount() <= 0)
break;

XMLSize_t size = 0;
XMLByte *out = Base64::encode(buf.data(), XMLSize_t(is->gcount()), &size);
if(out)
b64.append((char*)out, size);
delete out;
size_t pos = b64.size();
b64.resize(b64.size() + base64_enc_size(buf.size()));
int size = EVP_EncodeBlock((unsigned char*)&b64[pos], buf.data(), int(is->gcount()));
b64.resize(pos + size);
}
ifs.reset();

Expand Down Expand Up @@ -257,13 +247,7 @@ SiVaContainer::SiVaContainer(const string &path, ContainerOpenCB *cb, bool useHa
s->_tsTime = info.value<string>("timestampCreationTime", {});
s->_ocspTime = info.value<string>("ocspResponseCreationTime", {});
if(info.contains("timeAssertionMessageImprint"))
{
string base64 = info["timeAssertionMessageImprint"];
XMLSize_t size = 0;
XMLByte *message = Base64::decode((const XMLByte*)base64.c_str(), &size);
s->_messageImprint.assign(message, message + size);
delete message;
}
s->_messageImprint = from_base64(info["timeAssertionMessageImprint"].get<string_view>());
for(const json &signerRole: info.value<json>("signerRole", {}))
s->_signerRoles.push_back(signerRole["claimedRole"]);
if(json signatureProductionPlace = info.value<json>("signatureProductionPlace", {}); !signatureProductionPlace.is_null())
Expand All @@ -276,17 +260,15 @@ SiVaContainer::SiVaContainer(const string &path, ContainerOpenCB *cb, bool useHa
}
for(const json &certificate: signature.value<json>("certificates", {}))
{
XMLSize_t size = 0;
XMLByte *der = Base64::decode((const XMLByte*)certificate.value<string_view>("content", {}).data(), &size);
auto der = from_base64(certificate.value<string_view>("content", {}));
if(certificate["type"] == "SIGNING")
s->_signingCertificate = X509Cert(der, size, X509Cert::Der);
s->_signingCertificate = X509Cert(der, X509Cert::Der);
if(certificate["type"] == "REVOCATION")
s->_ocspCertificate = X509Cert(der, size, X509Cert::Der);
s->_ocspCertificate = X509Cert(der, X509Cert::Der);
if(certificate["type"] == "SIGNATURE_TIMESTAMP")
s->_tsCertificate = X509Cert(der, size, X509Cert::Der);
s->_tsCertificate = X509Cert(der, X509Cert::Der);
if(certificate["type"] == "ARCHIVE_TIMESTAMP")
s->_tsaCertificate = X509Cert(der, size, X509Cert::Der);
delete der;
s->_tsaCertificate = X509Cert(der, X509Cert::Der);
}
for(const json &error: signature.value<json>("errors", {}))
{
Expand Down Expand Up @@ -363,76 +345,33 @@ unique_ptr<Container> SiVaContainer::openInternal(const string &path, ContainerO

unique_ptr<istream> SiVaContainer::parseDDoc(bool useHashCode)
{
namespace xml = xsd::cxx::xml;
try
{
unique_ptr<DOMDocument> dom(SecureDOMParser().parseIStream(*d->ddoc));
DOMNodeList *nodeList = dom->getElementsByTagName(u"DataFile");
for(XMLSize_t i = 0; i < nodeList->getLength(); ++i)
auto doc = XMLDocument::openStream(*d->ddoc, {}, true);
for(auto dataFile = doc/"DataFile"; dataFile; dataFile++)
{
auto *item = static_cast<DOMElement*>(nodeList->item(i));
if(!item)
continue;

if(XMLString::compareString(item->getAttribute(u"ContentType"), u"HASHCODE") == 0)
auto contentType = dataFile.property("ContentType");
if(contentType == "HASHCODE")
THROW("Currently supports only content types EMBEDDED_BASE64 for DDOC format");
if(XMLString::compareString(item->getAttribute(u"ContentType"), u"EMBEDDED_BASE64") != 0)
if(contentType != "EMBEDDED_BASE64")
continue;

if(const XMLCh *b64 = item->getTextContent())
{
d->dataFiles.push_back(new DataFilePrivate(base64_decode(b64),
xml::transcode<char>(item->getAttribute(u"Filename")),
xml::transcode<char>(item->getAttribute(u"MimeType")),
xml::transcode<char>(item->getAttribute(u"Id"))));
}

d->dataFiles.push_back(new DataFilePrivate(base64_decode(dataFile),
string(dataFile.property("Filename")),
string(dataFile.property("MimeType")),
string(dataFile.property("Id"))));
if(!useHashCode)
continue;
Digest calc(URI_SHA1);
SecureDOMParser::calcDigestOnNode(&calc, "http://www.w3.org/TR/2001/REC-xml-c14n-20010315", item);
vector<unsigned char> digest = calc.result();
if(XMLSize_t size = 0; XMLByte *out = Base64::encode(digest.data(), XMLSize_t(digest.size()), &size))
{
item->setAttribute(u"ContentType", u"HASHCODE");
item->setAttribute(u"DigestType", u"sha1");
xml::string outXMLCh(reinterpret_cast<const char*>(out));
item->setAttribute(u"DigestValue", outXMLCh.c_str());
item->setTextContent(nullptr);
delete out;
}
doc.c14n(&calc, XMLDocument::C14D_ID_1_0, dataFile);
dataFile.setProperty("ContentType", "HASHCODE");
dataFile.setProperty("DigestType", "sha1");
dataFile.setProperty("DigestValue", to_base64(calc.result()));
dataFile = std::string_view{};
}

DOMImplementation *pImplement = DOMImplementationRegistry::getDOMImplementation(u"LS");
unique_ptr<DOMLSOutput> pDomLsOutput(pImplement->createLSOutput());
unique_ptr<DOMLSSerializer> pSerializer(pImplement->createLSSerializer());
auto result = make_unique<stringstream>();
xml::dom::ostream_format_target out(*result);
pDomLsOutput->setByteStream(&out);
pSerializer->setNewLine(u"\n");
pSerializer->write(dom.get(), pDomLsOutput.get());
doc.save(*result);
return result;
}
catch(const XMLException& e)
{
try {
string result = xml::transcode<char>(e.getMessage());
THROW("Failed to parse DDoc XML: %s", result.c_str());
} catch(const xml::invalid_utf16_string & /* ex */) {
THROW("Failed to parse DDoc XML.");
}
}
catch(const DOMException& e)
{
try {
string result = xml::transcode<char>(e.getMessage());
THROW("Failed to parse DDoc XML: %s", result.c_str());
} catch(const xml::invalid_utf16_string & /* ex */) {
THROW("Failed to parse DDoc XML.");
}
} catch(const xml::invalid_utf16_string & /* ex */) {
THROW("Failed to parse DDoc XML.");
}
catch(const Exception &)
{
throw;
Expand Down
122 changes: 116 additions & 6 deletions src/XMLDocument.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@

#pragma once

#include "crypto/Digest.h"
#include "util/log.h"

#include <libxml/parser.h>
#include <libxml/xmlschemas.h>
#include <libxml/c14n.h> // needs to be last to workaround old libxml2 errors

#include <openssl/evp.h>

#include <memory>
#include <istream>
Expand All @@ -46,6 +50,54 @@ constexpr std::unique_ptr<T, D> make_unique_ptr(T *p, D d) noexcept
return {p, d};
}

static std::vector<unsigned char> from_base64(std::string_view data)
{
static constexpr std::string_view whitespace {" \n\r\f\t\v"};
std::vector<unsigned char> result(EVP_DECODE_LENGTH(data.size()), 0);
size_t dataPos = 0;
int size = 0;
auto ctx = make_unique_ptr(EVP_ENCODE_CTX_new(), EVP_ENCODE_CTX_free);
EVP_DecodeInit(ctx.get());

for(auto pos = data.find_first_of(whitespace);
!data.empty();
pos = data.find_first_of(whitespace), dataPos += size_t(size))
{
auto sub = data.substr(0, pos);
if(pos == std::string_view::npos)
data = {};
else
data.remove_prefix(pos + 1);
if(EVP_DecodeUpdate(ctx.get(), &result[dataPos], &size, (const unsigned char*)sub.data(), int(sub.size())) >= 0)
continue;
result.clear();
return result;
}

if(EVP_DecodeFinal(ctx.get(), &result[dataPos], &size) == 1)
result.resize(dataPos + size_t(size));
else
result.clear();
return result;
}

static std::string to_base64(const std::vector<unsigned char> &data)
{
std::string result(EVP_ENCODE_LENGTH(data.size()), 0);
auto ctx = make_unique_ptr(EVP_ENCODE_CTX_new(), EVP_ENCODE_CTX_free);
EVP_EncodeInit(ctx.get());
int size{};
if(EVP_EncodeUpdate(ctx.get(), (unsigned char*)result.data(), &size, data.data(), int(data.size())) < 1)
{
result.clear();
return result;
}
auto pos = size_t(size);
EVP_EncodeFinal(ctx.get(), (unsigned char*)&result[pos], &size);
result.resize(pos + size_t(size));
return result;
}

template<class T>
struct XMLElem
{
Expand Down Expand Up @@ -185,34 +237,49 @@ struct XMLNode: public XMLElem<xmlNode>
}
};

struct XMLName
{
std::string_view name = {};
std::string_view ns = {};
};

struct XMLDocument: public unique_xml_t<decltype(xmlFreeDoc)>, public XMLNode
{
static constexpr std::string_view C14D_ID_1_0 {"http://www.w3.org/TR/2001/REC-xml-c14n-20010315"};
static constexpr std::string_view C14D_ID_1_0_COM {"http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments"};
static constexpr std::string_view C14D_ID_1_1 {"http://www.w3.org/2006/12/xml-c14n11"};
static constexpr std::string_view C14D_ID_1_1_COM {"http://www.w3.org/2006/12/xml-c14n11#WithComments"};
static constexpr std::string_view C14D_ID_1_0_EXC {"http://www.w3.org/2001/10/xml-exc-c14n#"};
static constexpr std::string_view C14D_ID_1_0_EXC_COM {"http://www.w3.org/2001/10/xml-exc-c14n#WithComments"};

using XMLNode::operator bool;

XMLDocument(element_type *ptr, std::string_view _name = {}, std::string_view _ns = {}) noexcept
XMLDocument(element_type *ptr, const XMLName &n = {}) noexcept
: std::unique_ptr<element_type, deleter_type>(ptr, xmlFreeDoc)
, XMLNode{xmlDocGetRootElement(get())}
{
if(d && !_name.empty() && _name != name() && !_ns.empty() && _ns != ns())
if(d && !n.name.empty() && n.name != name() && !n.ns.empty() && n.ns != ns())
d = {};
}

XMLDocument(std::string_view path, std::string_view name = {}) noexcept
: XMLDocument(xmlParseFile(path.data()), name)
XMLDocument(std::string_view path, const XMLName &n = {}) noexcept
: XMLDocument(xmlParseFile(path.data()), n)
{}

static XMLDocument openStream(std::istream &is, std::string_view name = {}, std::string_view ns = {})
static XMLDocument openStream(std::istream &is, const XMLName &name = {}, bool hugeFile = false)
{
auto ctxt = make_unique_ptr(xmlCreateIOParserCtxt(nullptr, nullptr, [](void *context, char *buffer, int len) -> int {
auto *is = static_cast<std::istream *>(context);
is->read(buffer, len);
return is->good() || is->eof() ? int(is->gcount()) : -1;
}, nullptr, &is, XML_CHAR_ENCODING_NONE), xmlFreeParserCtxt);
ctxt->linenumbers = 1;
if(hugeFile)
ctxt->options = XML_PARSE_HUGE;
auto result = xmlParseDocument(ctxt.get());
if(result != 0 || !ctxt->wellFormed)
THROW("%s", ctxt->lastError.message);
return {ctxt->myDoc, name, ns};
return {ctxt->myDoc, name};
}

static XMLDocument create(std::string_view name = {}, std::string_view href = {}, std::string_view prefix = {}) noexcept
Expand All @@ -228,6 +295,49 @@ struct XMLDocument: public unique_xml_t<decltype(xmlFreeDoc)>, public XMLNode
return doc;
}

void c14n(Digest *digest, std::string_view algo, XMLNode node)
{
xmlC14NMode mode = XML_C14N_1_0;
int with_comments = 0;
if(algo == C14D_ID_1_0)
mode = XML_C14N_1_0;
else if(algo == C14D_ID_1_0_COM)
with_comments = 1;
else if(algo == C14D_ID_1_1)
mode = XML_C14N_1_1;
else if(algo == C14D_ID_1_1_COM)
{
mode = XML_C14N_1_1;
with_comments = 1;
}
else if(algo == C14D_ID_1_0_EXC)
mode = XML_C14N_EXCLUSIVE_1_0;
else if(algo == C14D_ID_1_0_EXC_COM)
{
mode = XML_C14N_EXCLUSIVE_1_0;
with_comments = 1;
}
else if(!algo.empty())
THROW("Unsupported canonicalization method '%.*s'", int(algo.size()), algo.data());
auto *buf = xmlOutputBufferCreateIO([](void *context, const char *buffer, int len) {
auto *digest = static_cast<Digest *>(context);
digest->update(pcxmlChar(buffer), size_t(len));
return len;
}, nullptr, digest, nullptr);
int size = xmlC14NExecute(get(), [](void *root, xmlNodePtr node, xmlNodePtr parent) constexpr noexcept {
if(root == node)
return 1;
for(; parent; parent = parent->parent)
{
if(root == parent)
return 1;
}
return 0;
}, node.d, mode, nullptr, with_comments, buf);
if(size < 0)
THROW("Failed to canonicalizate input");
}

bool save(std::string_view path) const noexcept
{
return xmlSaveFormatFileEnc(path.data(), get(), "UTF-8", 1) > 0;
Expand Down
Loading