summaryrefslogtreecommitdiff
blob: 8e65d9e027924bd118735e4dd5c5451f443a6446 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
% Copyright (C) 2001-2021 Artifex Software, Inc.
% All Rights Reserved.
%
% This software is provided AS-IS with no warranty, either express or
% implied.
%
% This software is distributed under license and may not be copied,
% modified or distributed except as expressly authorized under the terms
% of the license contained in the file LICENSE in this distribution.
%
% Refer to licensing information at http://www.artifex.com or contact
% Artifex Software, Inc.,  1305 Grant Avenue - Suite 200, Novato,
% CA 94945, U.S.A., +1(415)492-9861, for further information.
%

% pdf2dsc.ps
% read pdf file and produce DSC "index" file.
%
% Input  file is named PDFname
% Output file is named DSCname
%
% Run using:
%  gs -dNODISPLAY -sPDFname=pdffilename -sDSCname=tempfilename pdf2dsc.ps
% Then display the PDF file with
%  gs tempfilename
%
% Modified by Jason McCarty, bug 688071
%       Add PageLabels support.
% Modified by Geoff Keating <geoffk@ozemail.com.au> 21/12/98:
%	Add DocumentMedia, PageMedia comments
%	Use inherited BoundingBox and Orientation
%	Reformat, add new macro 'puts', generally clean up
% Modified by Johannes Plass <plass@dipmza.physik.uni-mainz.de> 1996-11-05:
%	Adds BoundingBox and Orientation if available.
% Modified by rjl/lpd 9/19/96
%	Updates for compatibility with modified pdf_*.ps code for handling
%	  page ranges (i.e., partial files) better.
% Modified by Geoff Keating <Geoff.Keating@anu.edu.au> 7/3/96:
%	include Title and CreationDate DSC comments (these are displayed by
%	  Ghostview);
%	reduce the size of typical output files by a factor of about 3.
% Modified by L. Peter Deutsch 3/18/96:
%	Removes unnecessary and error-prone code duplicated from pdf_main.ps
% Modified by L. Peter Deutsch for GS 3.33
% Originally by Russell Lang  1995-04-26

/PDFfile PDFname (r) file def
/DSCfile DSCname (w) file def
systemdict /.setsafe known { .setsafe } if

/puts { DSCfile exch writestring } bind def
/DSCstring 255 string def
/MediaTypes 10 dict def

   PDFfile runpdfbegin
   /FirstPage where { pop } { /FirstPage 1 def } ifelse
   /LastPage where { pop } { /LastPage pdfpagecount def } ifelse

% scan through for media sizes, keep them in the dictionary
   FirstPage 1 LastPage {
      pdfgetpage /MediaBox pget pop   % MediaBox is a required attribute
      aload pop
      3 -1 roll sub 3 1 roll exch sub exch
      2 array astore
      aload 3 1 roll 10 string cvs exch 10 string cvs
      (x) 3 -1 roll concatstrings concatstrings cvn
      MediaTypes 3 1 roll exch put
   } for

% write header and prolog
   (%!PS-Adobe-3.0\n) puts
   Trailer /Info knownoget
    {
      dup /Title knownoget
       {
         (%%Title: ) puts
         DSCfile exch write==
       }
      if
      /CreationDate knownoget
       {
         (%%CreationDate: ) puts
         DSCfile exch write==
       }
      if
    }
   if
   % This is really supposed to be sorted by frequency of usage...
   (%%DocumentMedia: )
   MediaTypes {
      exch pop
      1 index puts
      (y) puts dup 1 get DSCstring cvs puts
      (x) puts dup 0 get DSCstring cvs puts
      ( ) puts dup 0 get DSCstring cvs puts
      ( ) puts 1 get DSCstring cvs puts
      ( 70 white ()\n) puts
      pop (%%+ )
   } forall
   pop

   (%%Pages: ) puts
   LastPage FirstPage sub 1 add DSCstring cvs puts
   (\n%%EndComments\n) puts
   (%%BeginProlog\n) puts
   (/Page null def\n/Page# 0 def\n/PDFSave null def\n) puts
   (/DSCPageCount 0 def\n) puts
   (/DoPDFPage {dup /Page# exch store dup dopdfpages } def\n) puts
   (%%EndProlog\n) puts
   (%%BeginSetup\n) puts
   DSCfile PDFname write==only
   ( \(r\) file { DELAYSAFER { .setsafe } if } stopped pop\n) puts
   ( runpdfbegin\n) puts
   ( process_trailer_attrs\n) puts
   (%%EndSetup\n) puts

   /.hasPageLabels false def % see "Page Labels" in the PDF Reference
   Trailer /Root knownoget {
     /PageLabels knownoget {
       /PageLabels exch def
       /.pageCounter 1 def
       /.pageCounterType /D def
       /.pagePrefix () def

       % (TEXT)  .ToLower  (text)  -- convert text to lowercase -- only letters!
       /.ToLower {
         dup length 1 sub  -1 0 {
           1 index exch 2 copy get 2#00100000 or put
         } for
       } def

       % int  .CvAlpha  (int in alphabetic base 26)  -- convert a positive
       % integer to base 26 in capital letters, with 1=A; i.e. A..Z, AA..AZ, ...
       /.CvAlpha { % using cvrs seems futile since this isn't zero-based ...
         [ exch % construct an array of ASCII values, in reverse
         { % the remainder stays on the top of stack
           dup 0 eq { pop exit } if % quit if the value is zero
           dup 26 mod dup 0 eq { 26 add } if % so that the division is correct
           dup 64 add 3 1 roll sub 26 idiv % save the ASCII value and iterate
         } loop ]
         dup length dup string 3 1 roll
         dup -1 1 { % put the letters in a string
           4 copy sub exch 4 -1 roll 1 sub get put
         } for pop pop
       } def

       % int  .CvRoman  (int in capital Roman numerals)
       % convert a positive integer to capital Roman numerals
       % return a decimal string if >= 4000
       /.CvRoman {
         dup DSCstring cvs % start with the decimal representation
         exch 4000 lt { % convert only if Roman numerals can represent this
           dup length
           [ [ () (I) (II) (III) (IV) (V) (VI) (VII) (VIII) (IX) ]
             [ () (X) (XX) (XXX) (XL) (L) (LX) (LXX) (LXXX) (XC) ]
             [ () (C) (CC) (CCC) (CD) (D) (DC) (DCC) (DCCC) (CM) ]
             [ () (M) (MM) (MMM) ] ] % Roman equivalents
           () % append the Roman equivalent of each decimal digit to this string
           2 index  -1 1 {
             2 index 1 index 1 sub get
             5 index 5 index 4 -1 roll sub get
             48 sub get concatstrings
           } for
           4 1 roll pop pop pop
         } if
       } def

       /PageToString <<
         /D { DSCstring cvs }
         /R { .CvRoman }
         /r { .CvRoman .ToLower }
         /A { .CvAlpha }
         /a { .CvAlpha .ToLower }
       >> def
       /.hasPageLabels true def
     } if
   } if

   % process each page
   FirstPage 1 LastPage {
       (%%Page: ) puts

       .hasPageLabels {
         dup 1 sub PageLabels exch numoget dup null ne {
           % page labels changed at this page, reset the values
           dup /S known { dup /S get } { null } ifelse
           /.pageCounterType exch def

           dup /P known { dup /P get } { () } ifelse
           /.pagePrefix exch def

           dup /St known { /St get } { pop 1 } ifelse
           /.pageCounter exch def
         } { pop } ifelse

         % output the page label
         (\() .pagePrefix
         .pageCounterType //null ne dup {
           PageToString .pageCounterType known and
         } if { % format the page number
           .pageCounter dup 0 gt { % don't try to format nonpositive numbers
             PageToString .pageCounterType get exec
           } {
             DSCstring cvs
          } ifelse
         } { () } ifelse
         (\)) concatstrings concatstrings concatstrings puts

         /.pageCounter .pageCounter 1 add def
       } {
         dup DSCstring cvs puts
       } ifelse
       ( ) puts
       dup DSCstring cvs puts
       (\n) puts

       dup pdfgetpage
       dup /MediaBox pget pop
         (%%PageMedia: y) puts
         aload pop 3 -1 roll sub DSCstring cvs puts
         (x) puts exch sub DSCstring cvs puts
         (\n) puts
       dup /CropBox pget {
         (%%PageBoundingBox: ) puts
         {DSCfile exch write=only ( ) puts} forall
         (\n) puts
       } if
       /Rotate pget {
         (%%PageOrientation: ) puts
         90 div cvi 4 mod dup 0 lt {4 add} if
         [(Portrait) (Landscape) (UpsideDown) (Seascape)] exch get puts
         (\n) puts
       } if

       DSCfile exch DSCstring cvs writestring
       ( DoPDFPage\n) puts
    } for
    runpdfend
% write trailer
(%%Trailer\n) puts
(runpdfend\n) puts
(%%EOF\n) puts
% close output file and exit
DSCfile closefile
quit
% end of pdf2dsc.ps