% Copyright (C) 1994-2006 Artifex Software, Inc. All rights reserved.
%
% This software is provided AS-IS with no warranty, either express or
% implied.
%
% This software is distributed under license and may not be copied,
% modified or distributed except as expressly authorized under the terms
% of the license contained in the file LICENSE in this distribution.
%
% For more information about licensing, please refer to
% http://www.ghostscript.com/licensing/. For information on
% commercial licensing, go to http://www.artifex.com/licensing/ or
% contact Artifex Software, Inc., 101 Lucas Valley Road #110,
% San Rafael, CA 94903, U.S.A., +1(415)492-9861.
% $Id: pdf_base.ps 9821 2009-06-26 12:42:29Z alexcher $
% pdf_base.ps
% Basic parser for PDF reader.
% This handles basic parsing of the file (including the trailer
% and cross-reference table), as well as objects, object references,
% streams, and name/number trees; it doesn't include any facilities for
% making marks on the page.
/.setlanguagelevel where { pop 2 .setlanguagelevel } if
.currentglobal true .setglobal
/pdfdict where { pop } { /pdfdict 100 dict def } ifelse
pdfdict begin
% Define the name interpretation dictionary for reading values.
/valueopdict mark
(<<) cvn { mark } bind % don't push an actual mark!
(>>) cvn { { .dicttomark } stopped {
( **** File has an unbalanced >> \(close dictionary\).\n)
pdfformaterror
} if
} bind
([) cvn { mark } bind % ditto
(]) cvn dup load
% /true true % see .pdfexectoken below
% /false false % ibid.
% /null null % ibid.
/F dup cvx % see Objects section below
/R dup cvx % see Objects section below
/stream dup cvx % see Streams section below
.dicttomark readonly def
% ------ Utilities ------ %
% Define a scratch string. The PDF language definition says that
% no line in a PDF file can exceed 255 characters, but this string
% is also used to search for %PDF-, which needs 1024 characters.
/pdfstring 1024 string def
% Read the previous line of a file. If we aren't at a line boundary,
% read the line containing the current position.
% Skip any blank lines.
/prevline % - prevline
{ PDFfile fileposition dup () pdfstring
2 index 257 sub 0 .max PDFfile exch setfileposition
{ % Stack: initpos linepos line string
PDFfile fileposition
PDFfile 2 index readline pop
dup length 0 gt
{ 3 2 roll 5 -2 roll pop pop 2 index }
{ pop }
ifelse
% Stack: initpos linepos line string startpos
PDFfile fileposition 5 index ge { exit } if
pop
}
loop pop pop 3 -1 roll pop
} bind def
% Handle the PDF 1.2 #nn escape convention when reading from a file.
% This should eventually be done in C.
/.pdffixname { % .pdffixname
PDFversion 1.2 ge {
dup .namestring (#) search {
name#escape cvn exch pop
} {
pop
} ifelse
} if
} bind def
/name#escape % <(#)>
name#escape
{ exch pop
1 index 2 () /SubFileDecode filter dup (x) readhexstring
% Stack: post pre stream char t/f
not { % tolerate, but complain about bad syntax
pop closefile (#) concatstrings exch
( **** Warning: Invalid hex following '#' name escape, using literal '#' in name.\n)
pdfformaterror
} {
exch closefile concatstrings
exch 2 1 index length 2 sub getinterval
} ifelse
(#) search { name#escape } if concatstrings
} bind def
% Execute a file, interpreting its executable names in a given
% dictionary. The name procedures may do whatever they want
% to the operand stack.
/.pdftokenerror { % .pdftokenerror -
BXlevel 0 le {
( **** Unknown operator: ') pdfformaterror
dup =string cvs pdfformaterror
% Attempt a retry scan of the element after changing to PDFScanInvNum
<< /PDFScanInvNum true >> setuserparams
=string cvs
token pop exch pop dup type
dup /integertype eq exch /realtype eq or {
exch pop exch pop
(', processed as number, value: ) pdfformaterror
dup =string cvs pdfformaterror (\n) pdfformaterror
<< /PDFScanInvNum null >> setuserparams % reset to default scanning rules
false % suppress any stack cleanup
} {
% error was non-recoverable with modified scanning rules
('\n) pdfformaterror
true
} ifelse
} {
true
} ifelse
{ % clean up the operand stack if this was non-recoverable
pop pop count exch sub { pop } repeat % pop all the operands
} if
} bind def
/.pdfexectoken { % .pdfexectoken ?
PDFDEBUG {
pdfdict /PDFSTEPcount known not { pdfdict /PDFSTEPcount 1 .forceput } if
PDFSTEP {
pdfdict /PDFtokencount 2 copy .knownget { 1 add } { 1 } ifelse .forceput
PDFSTEPcount 1 gt {
pdfdict /PDFSTEPcount PDFSTEPcount 1 sub .forceput
} {
dup ==only
( step # ) print PDFtokencount =only
( ? ) print flush 1 false .outputpage
(%stdin) (r) file 255 string readline {
token {
exch pop pdfdict /PDFSTEPcount 3 -1 roll .forceput
} {
pdfdict /PDFSTEPcount 1 .forceput
} ifelse % token
} {
pop /PDFSTEP false def % EOF on stdin
} ifelse % readline
} ifelse % PDFSTEPcount > 1
} {
dup ==only () = flush
} ifelse % PDFSTEP
} if % PDFDEBUG
2 copy .knownget {
exch pop exch pop exch pop exec
} {
% Normally, true, false, and null would appear in opdict
% and be treated as "operators". However, there is a
% special fast case in the PostScript interpreter for names
% that are defined in, and only in, systemdict and/or
% userdict: putting these three names in the PDF dictionaries
% destroys this property for them, slowing down their
% interpretation in all PostScript code. Therefore, we
% check for them explicitly here instead.
dup dup dup /true eq exch /false eq or exch /null eq or {
exch pop exch pop //systemdict exch get
} {
% Hackish fix to detect missing whitespace after "endobj". Yet another
% problem that (you guessed it!) Adobe Acrobat ignores silently
256 string cvs (endobj) anchorsearch {
( **** Missing whitespace after 'endobj'.\n) pdfformaterror
exch pop cvn get exch pop exec
} {
.pdftokenerror
} ifelse
} ifelse
} ifelse
} bind def
/PDFScanRules_true << /PDFScanRules true >> def
/PDFScanRules_null << /PDFScanRules null >> def
/.pdfrun { % .pdfrun -
% Construct a procedure with the stack depth, file and opdict
% bound into it.
1 index cvlit count 2 sub 3 1 roll mark
/PDFScanRules .getuserparam //null eq {
//PDFScanRules_true { setuserparams } 0 get % force PDF scanning mode
mark 7 4 roll
} {
mark 5 2 roll
} ifelse
{ % Stack: ..operands.. count opdict file
{ token } stopped {
dup type /filetype eq { pop } if % pop the operand if it is restored
( **** Error reading a content stream. The page may be incomplete.\n)
pdfformaterror
//false
} if {
dup type /nametype eq {
dup xcheck {
.pdfexectoken
} {
.pdffixname
exch pop exch pop PDFDEBUG {
PDFSTEPcount 1 le {
dup ==only ( ) print flush
} if
} if
} ifelse
} {
exch pop exch pop PDFDEBUG {
PDFSTEPcount 1 le {
dup ==only ( ) print flush
} if
} if
} ifelse
} {
(%%EOF) cvn cvx .pdfexectoken
} ifelse
}
aload pop .packtomark cvx
{ loop } 0 get 2 packedarray cvx
{ stopped } 0 get
/PDFScanRules .getuserparam //null eq {
//PDFScanRules_null { setuserparams } 0 get % reset PDF scannig mode if it was off
} if
/PDFsource PDFsource
{ store { stop } if } aload pop .packtomark cvx
/PDFsource 3 -1 roll store exec
} bind def
% Execute a file, like .pdfrun, for a marking context.
% This temporarily rebinds LocalResources and DefaultQstate.
/.pdfruncontext { % .pdfruncontext -
/.pdfrun load LocalResources DefaultQstate
/LocalResources 7 -1 roll store
/DefaultQstate qstate store
3 .execn
/DefaultQstate exch store
/LocalResources exch store
} bind def
% Get the depth of the PDF operand stack. The caller sets pdfemptycount
% before calling .pdfrun or .pdfruncontext. It is initially set by
% pdf_main, and is also set by any routine which changes the operand
% stack depth (currently .pdfpaintproc, although there are other callers
% of .pdfrun{context} which have not been checked for opstack depth.
/.pdfcount { % - .pdfcount
count pdfemptycount sub
} bind def
% Read a token, but simply return false (no token read) in the case of an
% error. This is messy because 'token' either may or may not pop its operand
% if an error occurs, and because the return values are different depending
% on whether the source is a file or a string. To avoid closing the file
% check for '{' before trying 'token'.
/token_nofail_dict mark
( ) { dup ( ) readstring pop pop } bind
(\t) 1 index
(\r) 1 index
(\n) 1 index
(\000) 1 index
({) { //null //true exit } bind
.dicttomark def
/token_nofail { % token_nofail false
% token_nofail true
% token_nofail true
dup type /filetype eq {
{ dup ( ) .peekstring not { ({) } if
//token_nofail_dict exch .knownget not {
//null 1 index { token } .internalstopped exit
} if
exec
} loop
{ % stack: source null [source]
//null ne { pop } if pop //false
} { % stack: source null ([post] token true | false)
{ 3 1 roll pop pop //true }
{ pop pop //false }
ifelse
} ifelse
} {
//null 1 index % stack: source null source
{ token } .internalstopped { % stack: source null [source]
//null ne { pop } if pop //false
} { % stack: source null ([post] token true | false)
{ 4 2 roll pop pop //true }
{ pop pop //false }
ifelse
} ifelse
} ifelse
} bind def
currentdict /token_nofail_dict .undef
% ================================ Objects ================================ %
% Since we may have more than 64K objects, we have to use a 2-D array to
% hold them (and the parallel Generations structure).
/lshift 9 def
/lnshift lshift neg def
/lsubmask 1 lshift bitshift 1 sub def
/lsublen lsubmask 1 add def
/larray { % - larray
[ [] ]
} bind def
/lstring { % - lstring
[ () ]
} bind def
/ltype { % type
0 get type
} bind def
/lget { % lget
dup //lsubmask and 3 1 roll //lnshift bitshift get exch get
} bind def
/lput { % lput -
3 1 roll
dup //lsubmask and 4 1 roll //lnshift bitshift get
3 1 roll put
} bind def
/llength { % llength
dup length 1 sub dup //lshift bitshift
3 1 roll get length add
} bind def
% lgrowto assumes newlength > llength(lseq)
/growto { % growto
1 index type /stringtype eq { string } { array } ifelse
2 copy copy pop exch pop
} bind def
/lgrowto { % lgrowto
dup //lsubmask add //lnshift bitshift dup 3 index length gt {
% Add more sub-arrays. Start by completing the last existing one.
% Stack: lseq newlen newtoplen
3 -1 roll dup llength 1 sub //lsubmask or 1 add lgrowto
% Stack: newlen newtoplen lseq
[ exch aload pop
counttomark 2 add -1 roll % newtoplen
counttomark sub { dup 0 0 getinterval lsublen growto } repeat
dup 0 0 getinterval ] exch
} {
pop
} ifelse
% Expand the last sub-array.
1 sub //lsubmask and 1 add
exch dup dup length 1 sub 2 copy
% Stack: newsublen lseq lseq len-1 lseq len-1
get 5 -1 roll growto put
} bind def
/lforall { % lforall -
/forall cvx 2 packedarray cvx forall
} bind def
% We keep track of PDF objects using the following PostScript variables:
%
% Generations (lstring): Generations[N] holds 1+ the current
% generation number for object number N. (As far as we can tell,
% this is needed only for error checking.) For free objects,
% Generations[N] is 0.
%
% Objects (larray): If object N is loaded, Objects[N] is the actual
% object; otherwise, Objects[N] is an executable integer giving
% the file offset of the object's location in the file. If
% ObjectStream[N] is non-zero then Objects[N] contains the index
% into the object stream instead of the file offset of the object.
%
% ObjectStream (larray): If object N is in an object stream then
% ObjectStream[N] holds the object number of the object stream.
% Otherwise ObjectStream[N] contains 0. If ObjectStream[N]
% is non-zero then Objects[N] contains the index into the object
% stream.
%
% GlobalObjects (dictionary): If object N has been resolved in
% global VM, GlobalObjects[N] is the same as Objects[N]
% (except that GlobalObjects itself is stored in global VM,
% so the entry will not be deleted at the end of the page).
%
% IsGlobal (lstring): IsGlobal[N] = 1 iff object N was resolved in
% global VM. This is an accelerator to avoid having to do a
% dictionary lookup in GlobalObjects when resolving every object.
% Initialize the PDF object tables.
/initPDFobjects { % - initPDFobjects -
/ObjectStream larray def
/Objects larray def
/Generations lstring def
.currentglobal true .setglobal
/GlobalObjects 20 dict def
.setglobal
/IsGlobal lstring def
} bind def
% Grow the tables to a specified size.
/growPDFobjects { % growPDFobjects -
dup ObjectStream llength gt {
dup ObjectStream exch lgrowto /ObjectStream exch def
} if
dup Objects llength gt {
dup Objects exch lgrowto /Objects exch def
} if
dup Generations llength gt {
dup Generations exch lgrowto /Generations exch def
} if
dup IsGlobal llength gt {
dup IsGlobal exch lgrowto /IsGlobal exch def
} if
pop
} bind def
% We represent an unresolved object reference by a procedure of the form
% {obj# gen# resolveR}. This is not a possible PDF object, because PDF has
% no way to represent procedures. Since PDF in fact has no way to represent
% any PostScript object that doesn't evaluate to itself, we can 'force'
% a possibly indirect object painlessly with 'exec'.
% Note that since we represent streams by executable dictionaries
% (see below), we need both an xcheck and a type check to determine
% whether an object has been resolved.
/resolved? { %