SegmentToDelta.php
14.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Pdf
* @subpackage Fonts
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: SegmentToDelta.php 24593 2012-01-05 20:35:02Z matthew $
*/
/** Zend_Pdf_Cmap */
require_once 'Zend/Pdf/Cmap.php';
/**
* Implements the "segment mapping to delta values" character map (type 4).
*
* This is the Microsoft standard mapping table type for OpenType fonts. It
* provides the ability to cover multiple contiguous ranges of the Unicode
* character set, with the exception of Unicode Surrogates (U+D800 - U+DFFF).
*
* @package Zend_Pdf
* @subpackage Fonts
* @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Pdf_Cmap_SegmentToDelta extends Zend_Pdf_Cmap
{
/**** Instance Variables ****/
/**
* The number of segments in the table.
* @var integer
*/
protected $_segmentCount = 0;
/**
* The size of the binary search range for segments.
* @var integer
*/
protected $_searchRange = 0;
/**
* The number of binary search steps required to cover the entire search
* range.
* @var integer
*/
protected $_searchIterations = 0;
/**
* Array of ending character codes for each segment.
* @var array
*/
protected $_segmentTableEndCodes = array();
/**
* The ending character code for the segment at the end of the low search
* range.
* @var integer
*/
protected $_searchRangeEndCode = 0;
/**
* Array of starting character codes for each segment.
* @var array
*/
protected $_segmentTableStartCodes = array();
/**
* Array of character code to glyph delta values for each segment.
* @var array
*/
protected $_segmentTableIdDeltas = array();
/**
* Array of offsets into the glyph index array for each segment.
* @var array
*/
protected $_segmentTableIdRangeOffsets = array();
/**
* Glyph index array. Stores glyph numbers, used with range offset.
* @var array
*/
protected $_glyphIndexArray = array();
/**** Public Interface ****/
/* Concrete Class Implementation */
/**
* Returns an array of glyph numbers corresponding to the Unicode characters.
*
* If a particular character doesn't exist in this font, the special 'missing
* character glyph' will be substituted.
*
* See also {@link glyphNumberForCharacter()}.
*
* @param array $characterCodes Array of Unicode character codes (code points).
* @return array Array of glyph numbers.
*/
public function glyphNumbersForCharacters($characterCodes)
{
$glyphNumbers = array();
foreach ($characterCodes as $key => $characterCode) {
/* These tables only cover the 16-bit character range.
*/
if ($characterCode > 0xffff) {
$glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
continue;
}
/* Determine where to start the binary search. The segments are
* ordered from lowest-to-highest. We are looking for the first
* segment whose end code is greater than or equal to our character
* code.
*
* If the end code at the top of the search range is larger, then
* our target is probably below it.
*
* If it is smaller, our target is probably above it, so move the
* search range to the end of the segment list.
*/
if ($this->_searchRangeEndCode >= $characterCode) {
$searchIndex = $this->_searchRange;
} else {
$searchIndex = $this->_segmentCount;
}
/* Now do a binary search to find the first segment whose end code
* is greater or equal to our character code. No matter the number
* of segments (there may be hundreds in a large font), we will only
* need to perform $this->_searchIterations.
*/
for ($i = 1; $i <= $this->_searchIterations; $i++) {
if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) {
$subtableIndex = $searchIndex;
$searchIndex -= $this->_searchRange >> $i;
} else {
$searchIndex += $this->_searchRange >> $i;
}
}
/* If the segment's start code is greater than our character code,
* that character is not represented in this font. Move on.
*/
if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) {
$glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
continue;
}
if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) {
/* This segment uses a simple mapping from character code to
* glyph number.
*/
$glyphNumbers[$key] = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536;
} else {
/* This segment relies on the glyph index array to determine the
* glyph number. The calculation below determines the correct
* index into that array. It's a little odd because the range
* offset in the font file is designed to quickly provide an
* address of the index in the raw binary data instead of the
* index itself. Since we've parsed the data into arrays, we
* must process it a bit differently.
*/
$glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] +
$this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount +
$subtableIndex - 1);
$glyphNumbers[$key] = $this->_glyphIndexArray[$glyphIndex];
}
}
return $glyphNumbers;
}
/**
* Returns the glyph number corresponding to the Unicode character.
*
* If a particular character doesn't exist in this font, the special 'missing
* character glyph' will be substituted.
*
* See also {@link glyphNumbersForCharacters()} which is optimized for bulk
* operations.
*
* @param integer $characterCode Unicode character code (code point).
* @return integer Glyph number.
*/
public function glyphNumberForCharacter($characterCode)
{
/* This code is pretty much a copy of glyphNumbersForCharacters().
* See that method for inline documentation.
*/
if ($characterCode > 0xffff) {
return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
}
if ($this->_searchRangeEndCode >= $characterCode) {
$searchIndex = $this->_searchRange;
} else {
$searchIndex = $this->_segmentCount;
}
for ($i = 1; $i <= $this->_searchIterations; $i++) {
if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) {
$subtableIndex = $searchIndex;
$searchIndex -= $this->_searchRange >> $i;
} else {
$searchIndex += $this->_searchRange >> $i;
}
}
if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) {
return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
}
if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) {
$glyphNumber = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536;
} else {
$glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] +
$this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount +
$subtableIndex - 1);
$glyphNumber = $this->_glyphIndexArray[$glyphIndex];
}
return $glyphNumber;
}
/**
* Returns an array containing the Unicode characters that have entries in
* this character map.
*
* @return array Unicode character codes.
*/
public function getCoveredCharacters()
{
$characterCodes = array();
for ($i = 1; $i <= $this->_segmentCount; $i++) {
for ($code = $this->_segmentTableStartCodes[$i]; $code <= $this->_segmentTableEndCodes[$i]; $code++) {
$characterCodes[] = $code;
}
}
return $characterCodes;
}
/**
* Returns an array containing the glyphs numbers that have entries in this character map.
* Keys are Unicode character codes (integers)
*
* This functionality is partially covered by glyphNumbersForCharacters(getCoveredCharacters())
* call, but this method do it in more effective way (prepare complete list instead of searching
* glyph for each character code).
*
* @internal
* @return array Array representing <Unicode character code> => <glyph number> pairs.
*/
public function getCoveredCharactersGlyphs()
{
$glyphNumbers = array();
for ($segmentNum = 1; $segmentNum <= $this->_segmentCount; $segmentNum++) {
if ($this->_segmentTableIdRangeOffsets[$segmentNum] == 0) {
$delta = $this->_segmentTableIdDeltas[$segmentNum];
for ($code = $this->_segmentTableStartCodes[$segmentNum];
$code <= $this->_segmentTableEndCodes[$segmentNum];
$code++) {
$glyphNumbers[$code] = ($code + $delta) % 65536;
}
} else {
$code = $this->_segmentTableStartCodes[$segmentNum];
$glyphIndex = $this->_segmentTableIdRangeOffsets[$segmentNum] - ($this->_segmentCount - $segmentNum) - 1;
while ($code <= $this->_segmentTableEndCodes[$segmentNum]) {
$glyphNumbers[$code] = $this->_glyphIndexArray[$glyphIndex];
$code++;
$glyphIndex++;
}
}
}
return $glyphNumbers;
}
/* Object Lifecycle */
/**
* Object constructor
*
* Parses the raw binary table data. Throws an exception if the table is
* malformed.
*
* @param string $cmapData Raw binary cmap table data.
* @throws Zend_Pdf_Exception
*/
public function __construct($cmapData)
{
/* Sanity check: The table should be at least 23 bytes in size.
*/
$actualLength = strlen($cmapData);
if ($actualLength < 23) {
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception('Insufficient table data',
Zend_Pdf_Exception::CMAP_TABLE_DATA_TOO_SMALL);
}
/* Sanity check: Make sure this is right data for this table type.
*/
$type = $this->_extractUInt2($cmapData, 0);
if ($type != Zend_Pdf_Cmap::TYPE_SEGMENT_TO_DELTA) {
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception('Wrong cmap table type',
Zend_Pdf_Exception::CMAP_WRONG_TABLE_TYPE);
}
$length = $this->_extractUInt2($cmapData, 2);
if ($length != $actualLength) {
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception("Table length ($length) does not match actual length ($actualLength)",
Zend_Pdf_Exception::CMAP_WRONG_TABLE_LENGTH);
}
/* Mapping tables should be language-independent. The font may not work
* as expected if they are not. Unfortunately, many font files in the
* wild incorrectly record a language ID in this field, so we can't
* call this a failure.
*/
$language = $this->_extractUInt2($cmapData, 4);
if ($language != 0) {
// Record a warning here somehow?
}
/* These two values are stored premultiplied by two which is convienent
* when using the binary data directly, but we're parsing it out to
* native PHP data types, so divide by two.
*/
$this->_segmentCount = $this->_extractUInt2($cmapData, 6) >> 1;
$this->_searchRange = $this->_extractUInt2($cmapData, 8) >> 1;
$this->_searchIterations = $this->_extractUInt2($cmapData, 10) + 1;
$offset = 14;
for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
$this->_segmentTableEndCodes[$i] = $this->_extractUInt2($cmapData, $offset);
}
$this->_searchRangeEndCode = $this->_segmentTableEndCodes[$this->_searchRange];
$offset += 2; // reserved bytes
for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
$this->_segmentTableStartCodes[$i] = $this->_extractUInt2($cmapData, $offset);
}
for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
$this->_segmentTableIdDeltas[$i] = $this->_extractInt2($cmapData, $offset); // signed
}
/* The range offset helps determine the index into the glyph index array.
* Like the segment count and search range above, it's stored as a byte
* multiple in the font, so divide by two as we extract the values.
*/
for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
$this->_segmentTableIdRangeOffsets[$i] = $this->_extractUInt2($cmapData, $offset) >> 1;
}
/* The size of the glyph index array varies by font and depends on the
* extent of the usage of range offsets versus deltas. Some fonts may
* not have any entries in this array.
*/
for (; $offset < $length; $offset += 2) {
$this->_glyphIndexArray[] = $this->_extractUInt2($cmapData, $offset);
}
/* Sanity check: After reading all of the data, we should be at the end
* of the table.
*/
if ($offset != $length) {
require_once 'Zend/Pdf/Exception.php';
throw new Zend_Pdf_Exception("Ending offset ($offset) does not match length ($length)",
Zend_Pdf_Exception::CMAP_FINAL_OFFSET_NOT_LENGTH);
}
}
}