Skip to content

Commit

Permalink
#261: recursion in page tree parsing. avoid with storing the current …
Browse files Browse the repository at this point in the history
…path and identifying cycles
  • Loading branch information
galkahana committed Jun 22, 2024
1 parent f41c1bd commit 516a6e7
Show file tree
Hide file tree
Showing 8 changed files with 193 additions and 4 deletions.
4 changes: 4 additions & 0 deletions PDFWriter/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ PDFPageMergingHelper.cpp
PDFParser.cpp
PDFParserTokenizer.cpp
PDFParsingOptions.cpp
PDFParsingPath.cpp
PDFReal.cpp
PDFRectangle.cpp
PDFStream.cpp
Expand Down Expand Up @@ -313,6 +314,7 @@ PDFPageMergingHelper.h
PDFParser.h
PDFParserTokenizer.h
PDFParsingOptions.h
PDFParsingPath.h
PDFReal.h
PDFRectangle.h
PDFStream.h
Expand Down Expand Up @@ -720,6 +722,8 @@ PDFNull.h
PDFObject.cpp
PDFObject.h
PDFObjectCast.h
PDFParsingPath.cpp
PDFParsingPath.h
PDFReal.cpp
PDFReal.h
PDFStreamInput.cpp
Expand Down
26 changes: 23 additions & 3 deletions PDFWriter/PDFParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -875,13 +875,19 @@ EStatusCode PDFParser::ParsePagesObjectIDs()
EStatusCode PDFParser::ParsePagesIDs(PDFDictionary* inPageNode,ObjectIDType inNodeObjectID)
{
unsigned long currentPageIndex = 0;
PDFParsingPath parsingPath;

return ParsePagesIDs(inPageNode,inNodeObjectID,currentPageIndex);
return ParsePagesIDs(inPageNode, inNodeObjectID, currentPageIndex, parsingPath);
}

static const std::string scPage = "Page";
static const std::string scPages = "Pages";
EStatusCode PDFParser::ParsePagesIDs(PDFDictionary* inPageNode,ObjectIDType inNodeObjectID,unsigned long& ioCurrentPageIndex)
EStatusCode PDFParser::ParsePagesIDs(
PDFDictionary* inPageNode,
ObjectIDType inNodeObjectID,
unsigned long& ioCurrentPageIndex,
PDFParsingPath& ioParsingPath
)
{
// recursion.
// if this is a page, write it's node object ID in the current page index and +1
Expand All @@ -891,6 +897,12 @@ EStatusCode PDFParser::ParsePagesIDs(PDFDictionary* inPageNode,ObjectIDType inNo

do
{
// add object to parsing path, checking for cycles
if(ioParsingPath.EnterObject(inNodeObjectID) != eSuccess) {
status = PDFHummus::eFailure;
break;
}

PDFObjectCastPtr<PDFName> objectType(inPageNode->QueryDirectObject("Type"));
if(!objectType)
{
Expand Down Expand Up @@ -952,7 +964,7 @@ EStatusCode PDFParser::ParsePagesIDs(PDFDictionary* inPageNode,ObjectIDType inNo
break;
}

status = ParsePagesIDs(pageNodeObject.GetPtr(),((PDFIndirectObjectReference*)it.GetItem())->mObjectID,ioCurrentPageIndex);
status = ParsePagesIDs(pageNodeObject.GetPtr(),((PDFIndirectObjectReference*)it.GetItem())->mObjectID, ioCurrentPageIndex, ioParsingPath);
}
}
else
Expand All @@ -961,6 +973,14 @@ EStatusCode PDFParser::ParsePagesIDs(PDFDictionary* inPageNode,ObjectIDType inNo
status = PDFHummus::eFailure;
break;
}


// exit object
if(ioParsingPath.ExitObject(inNodeObjectID) != eSuccess) {
status = PDFHummus::eFailure;
break;
}

}while(false);

return status;
Expand Down
3 changes: 2 additions & 1 deletion PDFWriter/PDFParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "DecryptionHelper.h"
#include "PDFParsingOptions.h"
#include "InputOffsetStream.h"
#include "PDFParsingPath.h"

#include <map>
#include <set>
Expand Down Expand Up @@ -214,7 +215,7 @@ class PDFParser
PDFHummus::EStatusCode SetupDecryptionHelper(const std::string& inPassword);
PDFHummus::EStatusCode ParsePagesObjectIDs();
PDFHummus::EStatusCode ParsePagesIDs(PDFDictionary* inPageNode,ObjectIDType inNodeObjectID);
PDFHummus::EStatusCode ParsePagesIDs(PDFDictionary* inPageNode,ObjectIDType inNodeObjectID,unsigned long& ioCurrentPageIndex);
PDFHummus::EStatusCode ParsePagesIDs(PDFDictionary* inPageNode,ObjectIDType inNodeObjectID,unsigned long& ioCurrentPageIndex, PDFParsingPath& ioParsingPath);
PDFHummus::EStatusCode ParsePreviousXrefs(PDFDictionary* inTrailer);
PDFHummus::EStatusCode MergeXrefWithMainXref(XrefEntryInputVector& inTableToMerge,ObjectIDType inMergedTableSize);
PDFHummus::EStatusCode ParseFileDirectory();
Expand Down
75 changes: 75 additions & 0 deletions PDFWriter/PDFParsingPath.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@

/*
Source File : PDFParsingPath.cpp
Copyright 2011 Gal Kahana PDFWriter
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

#include "PDFParsingPath.h"
#include "Trace.h"

#include <sstream>


using namespace std;
using namespace PDFHummus;

PDFParsingPath::PDFParsingPath() {

}


EStatusCode PDFParsingPath::EnterObject(ObjectIDType inObjectId) {
ObjectIDTypeList::iterator it = find(mObjectsPath.begin(), mObjectsPath.end(), inObjectId);
if(it != mObjectsPath.end()) {
TRACE_LOG2("PDFParsingPath::EnterObject, attempting to enter object %ld, where the object already exists in the current path: %s",inObjectId,PrintPath().c_str());
return eFailure;
}

mObjectsPath.push_back(inObjectId);
return eSuccess;
}

EStatusCode PDFParsingPath::ExitObject(ObjectIDType inObjectId) {
if(mObjectsPath.size() == 0 || mObjectsPath.back() != inObjectId) {
TRACE_LOG2("PDFParsingPath::ExitObject, attempting to exit object %ld, where the object is NOT the last entered: %s",inObjectId,PrintPath().c_str());
return eFailure;
}

mObjectsPath.pop_back();
return eSuccess;

}

void PDFParsingPath::Reset() {
mObjectsPath.clear();
}

std::string PDFParsingPath::PrintPath() {
std::stringstream pathWriter;
ObjectIDTypeList::iterator it = mObjectsPath.begin();

if(it != mObjectsPath.end()) {
pathWriter<<*it;
for(; it != mObjectsPath.end(); ++it) {
pathWriter<<", "<<*it;
}
}

return pathWriter.str();
}
47 changes: 47 additions & 0 deletions PDFWriter/PDFParsingPath.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
Source File : PDFParsingPath.h
Copyright 2011 Gal Kahana PDFWriter
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#pragma once

#include "EStatusCode.h"
#include "ObjectsBasicTypes.h"

#include <list>
#include <string>


typedef std::list<ObjectIDType> ObjectIDTypeList;


class PDFParsingPath {

public:
PDFParsingPath();

PDFHummus::EStatusCode EnterObject(ObjectIDType inObjectId);
PDFHummus::EStatusCode ExitObject(ObjectIDType inObjectId);

void Reset();
private:

ObjectIDTypeList mObjectsPath;

std::string PrintPath();
};
10 changes: 10 additions & 0 deletions PDFWriterTesting/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ create_test_sourcelist (Tests
PDFEmbedTest.cpp
PDFObjectCastTest.cpp
PDFObjectParserTest.cpp
PDFParserFuzzSanity.cpp
PDFParserTest.cpp
PDFWithPassword.cpp
PFBStreamTest.cpp
Expand Down Expand Up @@ -102,12 +103,21 @@ endif(APPLE)
# Add all the ADD_TEST for each test (reusing the create_test_sourcelist list minus the generated executable)
set (TestsToRun ${Tests})
list(REMOVE_AT TestsToRun 0) # removing first item which is PDFWriterTesting. started getting a full path for it, so moved to REMOVE_AT instead of REMOVE_ITEM with the file name
list(REMOVE_ITEM TestsToRun PDFParserFuzzSanity.cpp) # will add tests for this specifically

foreach (test ${TestsToRun})
get_filename_component (TName ${test} NAME_WE)
add_test (NAME ${TName} COMMAND PDFWriterTesting ${TName} ${CMAKE_CURRENT_SOURCE_DIR}/Materials ${CMAKE_BINARY_DIR}/Testing/Output)
endforeach ()

# fuzz test specific
file(GLOB fuzztestfiles ${CMAKE_CURRENT_SOURCE_DIR}/Materials/fuzzing/*)
foreach (fuzztestfile ${fuzztestfiles})
get_filename_component (TName ${fuzztestfile} NAME)
add_test (NAME FuzzTest_${TName} COMMAND PDFWriterTesting PDFParserFuzzSanity ${CMAKE_CURRENT_SOURCE_DIR}/Materials ${CMAKE_BINARY_DIR}/Testing/Output ${fuzztestfile})
endforeach ()


# create temp dir for output files
set (TmpOutputDir ${CMAKE_BINARY_DIR}/Testing/Output)

Expand Down
Binary file not shown.
32 changes: 32 additions & 0 deletions PDFWriterTesting/PDFParserFuzzSanity.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#include "PDFParser.h"
#include "InputFile.h"
#include "Trace.h"

#include "testing/TestIO.h"

#include <iostream>

using namespace std;
using namespace PDFHummus;

int PDFParserFuzzSanity(int argc, char* argv[]) {
PDFParser parser;
InputFile pdfFile;

std::string path = argv[3];

EStatusCode status = pdfFile.OpenFile(path);
if(status != PDFHummus::eSuccess)
{
cout<<"unable to open file for reading, Path:"<<path.c_str()<<"\n";
return 1;
}

// traces on
Trace::DefaultTrace().SetLogSettings(BuildRelativeOutputPath(argv, "PDFParserFuzzSanity.txt"), true, true);

// checks that returns, status doesn't matter
parser.StartPDFParsing(pdfFile.GetInputStream());

return status;
}

0 comments on commit 516a6e7

Please sign in to comment.