Logo Search packages:      
Sourcecode: libpodofo version File versions  Download package

void PoDoFo::PdfParser::HasLinearizationDict (  ) [protected]

Checks wether this pdf is linearized or not. Initializes the linearization directory on sucess.

Definition at line 329 of file PdfParser.cpp.

References PoDoFo::PdfInputDevice::Clear(), PoDoFo::PdfRefCountedInputDevice::Device(), PoDoFo::eLogSeverity_Warning, PoDoFo::ePdfError_InternalLogic, PoDoFo::ePdfError_InvalidLinearization, PoDoFo::ePdfVersion_1_5, PoDoFo::PdfRefCountedBuffer::GetBuffer(), PoDoFo::PdfVariant::GetDictionary(), PoDoFo::PdfRefCountedBuffer::GetSize(), PoDoFo::PdfDictionary::HasKey(), PoDoFo::PdfVariant::IsDictionary(), PoDoFo::PdfTokenizer::IsWhitespace(), PoDoFo::PdfError::LogMessage(), ParseFile(), PODOFO_RAISE_ERROR, PODOFO_RAISE_ERROR_INFO, PoDoFo::PdfInputDevice::Read(), PoDoFo::PdfInputDevice::Seek(), PoDoFo::PdfInputDevice::Tell(), and PoDoFo::PdfError::what().

{
    if (m_pLinearization)
    {
        PODOFO_RAISE_ERROR_INFO( ePdfError_InternalLogic,
                "HasLinarizationDict() called twice on one object");
    }

    m_device.Device()->Seek( 0 );

    // The linearization dictionary must be in the first 1024 
    // bytes of the PDF, our buffer might be larger so.
    // Therefore read only the first 1024 byte.
    // Normally we should jump to the end of the file, to determine
    // it's filesize and read the min(1024, filesize) to not fail
    // on smaller files, but jumping to the end is against the idea
    // of linearized PDF. Therefore just check if we read anything.
    const std::streamoff MAX_READ = 1024;
    PdfRefCountedBuffer linearizeBuffer( MAX_READ );

    std::streamoff size = m_device.Device()->Read( linearizeBuffer.GetBuffer(), 
                                                   linearizeBuffer.GetSize() );
    // Only fail if we read nothing, to allow files smaller than MAX_READ
    if( static_cast<size_t>(size) <= 0 )
    {
        // Clear the error state from the bad read
        m_device.Device()->Clear();
        return; // Ignore Error Code: ERROR_PDF_NO_TRAILER;
    }

    char * pszObj = strstr( m_buffer.GetBuffer(), "obj" );
    if( !pszObj )
        // strange that there is no obj in the first 1024 bytes,
        // but ignore it
        return;
    
    --pszObj; // *pszObj == 'o', so the while would fail without decrement
    while( *pszObj && (PdfTokenizer::IsWhitespace( *pszObj ) || (*pszObj >= '0' && *pszObj <= '9')) )
        --pszObj;

    m_pLinearization = new PdfParserObject( m_vecObjects, m_device, linearizeBuffer,
                                            pszObj - linearizeBuffer.GetBuffer() + 2 );

    try {
        // Do not care for encryption here, as the linearization dictionary does not contain strings or streams
        // ... hint streams do, but we do not load the hintstream.
        static_cast<PdfParserObject*>(m_pLinearization)->ParseFile( NULL );
        if (! (m_pLinearization->IsDictionary() &&
               m_pLinearization->GetDictionary().HasKey( "Linearized" ) ) )
        {
            delete m_pLinearization;
            m_pLinearization = NULL;
            return;
        }
    } catch( PdfError & e ) {
        PdfError::LogMessage( eLogSeverity_Warning, e.what() );
        delete m_pLinearization;
        m_pLinearization = NULL;
        return;
    }
    
    long long      lXRef      = -1;
    lXRef = m_pLinearization->GetDictionary().GetKeyAsLong( "T", lXRef );
    if( lXRef == -1 )
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidLinearization );
    }

    // avoid moving to a negative file position here
    m_device.Device()->Seek( (static_cast<pdf_long>(lXRef-PDF_XREF_BUF) > 0 ? static_cast<pdf_long>(lXRef-PDF_XREF_BUF) : PDF_XREF_BUF) );
    m_nXRefLinearizedOffset = m_device.Device()->Tell();

    if( m_device.Device()->Read( m_buffer.GetBuffer(), PDF_XREF_BUF ) != PDF_XREF_BUF )
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidLinearization );
    }

    m_buffer.GetBuffer()[PDF_XREF_BUF] = '\0';

    // search backwards in the buffer in case the buffer contains null bytes
    // because it is right after a stream (can't use strstr for this reason)
    const int XREF_LEN    = 4; // strlen( "xref" );
    int       i          = 0;
    char*     pszStart   = NULL;
    for( i = PDF_XREF_BUF - XREF_LEN; i >= 0; i-- )
        if( strncmp( m_buffer.GetBuffer()+i, "xref", XREF_LEN ) == 0 )
        {
            pszStart = m_buffer.GetBuffer()+i;
            break;
        }

    m_nXRefLinearizedOffset += i;
    
    if( !pszStart )
    {
        if( m_ePdfVersion < ePdfVersion_1_5 )
        {
            PdfError::LogMessage( eLogSeverity_Warning, 
                                  "Linearization dictionaries are only supported with PDF version 1.5. This is 1.%i. Trying to continue.\n", 
                                  static_cast<int>(m_ePdfVersion) );
            // PODOFO_RAISE_ERROR( ePdfError_InvalidLinearization );
        }

        {
            m_nXRefLinearizedOffset = static_cast<pdf_long>(lXRef);
            /*
            eCode = ReadXRefStreamContents();
            i     = 0;
            */
        }
    }
}

Here is the call graph for this function:


Generated by  Doxygen 1.6.0   Back to index