$inputFileName="bachorgan.pdf"; $outputFileName="bachorgan_numbered.pdf"; # first pass -- (1) find number of highest-numbered objects %offsetByObj=(); $highestObjNum=-1; open(IN,$inputFileName); while () { if (/^([0-9]+) 0 obj$/) { $objNum=$1; if ($objNum>$highestObjNum) { $highestObjNum=$objNum; } } } close(IN); # second pass, modify/write file open(IN,$inputFileName); open(OUT,">$outputFileName"); binmode(OUTF); @pages=(); $fontObjNum=$highestObjNum+1; $highestObjNum++; $firstPageNumObjNum=$highestObjNum+1; $inXref=0; $inStartxref=0; $offset=0; $xrefOffset=-1; while () { if ($inStartxref) { print OUT "$xrefOffset\n"; $inStartxref=0; } else { if ($inXref) { if (/^trailer$/) { $inXref=0; print OUT "$_"; } } else { if (/^([0-9]+) 0 obj$/) { $objNum=$1; $offsetByObj{$objNum}=$offset; } if (/^\/Contents ([0-9]+) 0 R$/) { $contentsId=$1; $highestObjNum++; $_="/Contents [$contentsId 0 R $highestObjNum 0 R]\n"; } # type of /Resources /ProcSet that appears in the actual file in music pages. There are other types, but those appear in the contents. I ignore the contents, because they define their own fonts. The sum total is that Acrobat says that it couldn't find a font (for those contents pages), so it replaced it with Helvetica, which is what I wanted anyway. The warning's a bit scary, but it seems to work. if (/^\/Resources<<\/ProcSet\[\/PDF \/ImageB\]$/) { $_="/Resources<>\n"; $gotResources=1; } if (/^xref$/) { # dump the font resource object $offsetByObj{$fontObjNum}=$offset; $fontObj="$fontObjNum 0 obj\n<<\n/Type /Font\n/Subtype /Type1\n/Name /F1\n/BaseFont /Helvetica\n/Encoding /WinAnsiEncoding\n>>\nendobj\n"; $offset += length($fontObj); print OUT "$fontObj"; # dump all the page num objects we've made $pageNum=1; #print "$firstPageNumObjNum < $hi for ($i=$firstPageNumObjNum; $i<=$highestObjNum; $i++) { #print "$i\n"; $offsetByObj{$i}=$offset; $stream="BT\n/F1 12 Tf\n432 9 Td($pageNum)Tj\nET\nBT\n/F1 12 Tf\n108 9 Td($pageNum)Tj\nET\n"; $streamLen=length($stream); $obj="$i 0 obj\n<< /Length $streamLen >>\nstream\n$stream"."endstream\nendobj\n"; $offset += length($obj); print OUT "$obj"; $pageNum++; } # now actually do the xref $xrefOffset=$offset; print OUT $_; $numObjects=$highestObjNum+1; print OUT "0 $numObjects\n"; print OUT "0000000000 65535 f \n"; for ($i=1; $i<=$highestObjNum; $i++) { $s=$offsetByObj{$i}; while (length($s)<10) { $s="0"."$s"; } print OUT "$s 00000 n \n"; } $inXref=1; } if (/^startxref$/) { $inStartxref=1; } if (!$inXref) { print OUT $_; } } } $offset += length($_); }