Logo Search packages:      
Sourcecode: libpodofo version File versions  Download package

void PoDoFo::PdfParser::ReadObjectsInternal (  ) [protected]

Reads all objects from the pdf into memory from the offsets listed in m_vecOffsets.

Requires a correctly setup PdfEncrypt object with correct password.

This method is called from ReadObjects or SetPassword.

See also:
ReadObjects
SetPassword

Definition at line 891 of file PdfParser.cpp.

References PoDoFo::PdfVecObjects::AddFreeObject(), PoDoFo::PdfError::AddToCallstack(), PoDoFo::PdfVecObjects::begin(), PoDoFo::eLogSeverity_Error, PoDoFo::eLogSeverity_Warning, PoDoFo::PdfVecObjects::end(), PoDoFo::ePdfError_InvalidXRef, PoDoFo::PdfReference::GenerationNumber(), PoDoFo::PdfObject::GetStream(), PoDoFo::PdfObject::HasStream(), PoDoFo::PdfParserObject::HasStreamToParse(), PoDoFo::PdfError::LogMessage(), PoDoFo::PdfReference::ObjectNumber(), PoDoFo::PdfParserObject::ParseFile(), PODOFO_RAISE_ERROR_INFO, PoDoFo::PdfVecObjects::push_back(), ReadObjectFromStream(), PoDoFo::PdfObject::Reference(), PoDoFo::PdfParserObject::SetLoadOnDemand(), PoDoFo::PdfVecObjects::Sort(), and UpdateDocumentVersion().

Referenced by ReadObjects(), and SetPassword().

{
    int              i          = 0;
    int              nLast      = 0;
    PdfParserObject* pObject    = NULL;

    // Read objects
    for( i=0; i < m_nNumObjects; i++ )
    {
        if( m_offsets[i].bParsed && m_offsets[i].cUsed == 'n' && m_offsets[i].lOffset > 0 )
        {
            //printf("Reading object %i 0 R from %li\n", i, m_offsets[i].lOffset );
            
            pObject = new PdfParserObject( m_vecObjects, m_device, m_buffer, m_offsets[i].lOffset );
            pObject->SetLoadOnDemand( m_bLoadOnDemand );
            try {
                pObject->ParseFile( m_pEncrypt );
                nLast = pObject->Reference().ObjectNumber();

                /*
                if( i != pObject->Reference().ObjectNumber() ) 
                {
                    printf("Expected %i got %i\n", i, pObject->Reference().ObjectNumber());
                }
                if( pObject->Reference().ObjectNumber() != i ) 
                {
                    printf("EXPECTED: %i got %i\n", i, pObject->Reference().ObjectNumber() );
                    abort();
                }
                */

                // final pdf should not contain a linerization dictionary as it contents are invalid 
                // as we change some objects and the final xref table
                if( m_pLinearization && nLast == static_cast<int>(m_pLinearization->Reference().ObjectNumber()) )
                {
                    m_vecObjects->AddFreeObject( pObject->Reference() );
                    delete pObject;
                }
                else
                    m_vecObjects->push_back( pObject );
            } catch( PdfError & e ) {
                std::ostringstream oss;
                if( pObject )
                {
                    oss << "Error while loading object " << pObject->Reference().ObjectNumber() 
                        << " " << pObject->Reference().GenerationNumber() 
                        << " Offset = " << m_offsets[i].lOffset
                        << " Index = " << i << std::endl;
                    delete pObject;
                }

                if( m_bIgnoreBrokenObjects ) 
                {
                    PdfError::LogMessage( eLogSeverity_Error, oss.str().c_str() );
                    m_vecObjects->AddFreeObject( PdfReference( i, 0 ) );
                }
                else
                {
                    e.AddToCallstack( __FILE__, __LINE__, oss.str().c_str() );
                    throw e;
                }
            }
        }
        else if( m_offsets[i].bParsed && m_offsets[i].cUsed == 'n' && (m_offsets[i].lOffset == 0)  )
        {
            // There are broken PDFs which add objects with 'n' 
            // and 0 offset and 0 generation number
            // to the xref table instead of using free objects
            // treating them as free objects
            if( m_bStrictParsing ) 
            {
                PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidXRef,
                                         "Found object with 0 offset which should be 'f' instead of 'n'." );
            }
            else
            {
                PdfError::LogMessage( eLogSeverity_Warning, 
                                      "Treating object %i 0 R as a free object." );
                m_vecObjects->AddFreeObject( PdfReference( i, 1LL ) );
            }
        }
// Ulrich Arnold 30.7.2009: the linked free list in the xref section is not always correct in pdf's
//                                        (especially Illustrator) but Acrobat still accepts them. I've seen XRefs 
//                                        where some object-numbers are alltogether missing and multiple XRefs where 
//                                        the link list is broken.
//                                        Because PdfVecObjects relies on a unbroken range, fill the free list more
//                                        robustly from all places which are either free or unparsed
//      else if( m_offsets[i].bParsed && m_offsets[i].cUsed == 'f' && m_offsets[i].lOffset )
//      {
//          m_vecObjects->AddFreeObject( PdfReference( static_cast<int>(m_offsets[i].lOffset), 1LL ) ); // TODO: do not hard code
//      }
        else if( (!m_offsets[i].bParsed || m_offsets[i].cUsed == 'f') && i != 0 )
        {
                  m_vecObjects->AddFreeObject( PdfReference( static_cast<int>(i), 1LL ) ); // TODO: do not hard code generation number
        }
    }

    // all normal objects including object streams are available now,
    // we can parse the object streams safely now.
    //
    // Note that even if demand loading is enabled we still currently read all
    // objects from the stream into memory then free the stream.
    //
    for( i = 0; i < m_nNumObjects; i++ )
    {
        if( m_offsets[i].bParsed && m_offsets[i].cUsed == 's' ) // we have an object stream
        {
#if defined(PODOFO_VERBOSE_DEBUG)
            if (m_bLoadOnDemand) cerr << "Demand loading on, but can't demand-load from object stream." << endl;
#endif
            ReadObjectFromStream( static_cast<int>(m_offsets[i].lGeneration), 
                                  static_cast<int>(m_offsets[i].lOffset) );
        }
    }

    if( !m_bLoadOnDemand )
    {
        // Force loading of streams. We can't do this during the initial
        // run that populates m_vecObjects because a stream might have a /Length
        // key that references an object we haven't yet read. So we must do it here
        // in a second pass, or (if demand loading is enabled) defer it for later.
        for (TCIVecObjects itObjects = m_vecObjects->begin();
             itObjects != m_vecObjects->end();
             ++itObjects)
        {
            pObject = dynamic_cast<PdfParserObject*>(*itObjects);
            // only parse streams for objects that have not yet parsed
            // their streams
            if( pObject && pObject->HasStreamToParse() && !pObject->HasStream() )
                pObject->GetStream();
        }
    }


    // Now sort the list of objects
    m_vecObjects->Sort();

    UpdateDocumentVersion();
}

Here is the call graph for this function:

Here is the caller graph for this function:


Generated by  Doxygen 1.6.0   Back to index