Intl.php
5.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
<?php // vi: set fenc=utf-8 ts=4 sw=4 et:
/*
* Copyright (C) 2013 Nicolas Grekas - p@tchwork.com
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the (at your option):
* Apache License v2.0 (http://apache.org/licenses/LICENSE-2.0.txt), or
* GNU General Public License v2.0 (http://gnu.org/licenses/gpl-2.0.txt).
*/
namespace Patchwork\PHP\Shim;
/**
* Partial intl implementation in pure PHP.
*
* Implemented:
* - grapheme_extract - Extract a sequence of grapheme clusters from a text buffer, which must be encoded in UTF-8
* - grapheme_stripos - Find position (in grapheme units) of first occurrence of a case-insensitive string
* - grapheme_stristr - Returns part of haystack string from the first occurrence of case-insensitive needle to the end of haystack
* - grapheme_strlen - Get string length in grapheme units
* - grapheme_strpos - Find position (in grapheme units) of first occurrence of a string
* - grapheme_strripos - Find position (in grapheme units) of last occurrence of a case-insensitive string
* - grapheme_strrpos - Find position (in grapheme units) of last occurrence of a string
* - grapheme_strstr - Returns part of haystack string from the first occurrence of needle to the end of haystack
* - grapheme_substr - Return part of a string
*/
class Intl
{
static function grapheme_extract($s, $size, $type = GRAPHEME_EXTR_COUNT, $start = 0, &$next = 0)
{
if (is_array($s)) return !user_error(__METHOD__ . '() expects parameter 1 to be string, array given', E_USER_WARNING);
$s = (string) $s;
$size = (int) $size;
$type = (int) $type;
$start = (int) $start;
if ('' === $s || 0 > $size || 0 > $start || 0 > $type || 2 < $type) return false;
if (0 === $size) return '';
$next = $start;
$s = substr($s, $start); //TODO: seek to the first character boundary when needed
if (GRAPHEME_EXTR_COUNT === $type)
{
if ($size > 65635)
{
// Workaround PCRE limiting quantifiers to 65635.
$rx = floor(sqrt($size));
$size -= $rx * $rx; // This can't be greather than 65635: the native intl is limited to 2Gio strings
$rx = '(?:' . GRAPHEME_CLUSTER_RX . "{{$rx}}){{$rx}}" . GRAPHEME_CLUSTER_RX . "{1,{$size}}";
}
else $rx = GRAPHEME_CLUSTER_RX . "{1,{$size}}";
$s = preg_split("/({$rx})/u", $s, 2, PREG_SPLIT_DELIM_CAPTURE);
$next += strlen($s[0]);
$s = isset($s[1]) ? $s[1] : '';
}
else
{
//TODO
return !user_error(__METHOD__ . '() with GRAPHEME_EXTR_MAXBYTES or GRAPHEME_EXTR_MAXCHARS is not implemented', E_USER_WARNING);
}
$next += strlen($s);
return $s;
}
static function grapheme_strlen($s)
{
preg_replace('/' . GRAPHEME_CLUSTER_RX . '/u', '', $s, -1, $s);
return $s;
}
static function grapheme_substr($s, $start, $len = 2147483647)
{
preg_match_all('/' . GRAPHEME_CLUSTER_RX . '/u', $s, $s);
$slen = count($s[0]);
$start = (int) $start;
if (0 > $start) $start += $slen;
if (0 > $start) return false;
if ($start >= $slen) return false;
$rem = $slen - $start;
if (0 > $len) $len += $rem;
if (0 === $len) return '';
if (0 > $len) return false;
if ($len > $rem) $len = $rem;
return implode('', array_slice($s[0], $start, $len));
}
static function grapheme_substr_workaround62759($s, $start, $len)
{
// Intl based http://bugs.php.net/62759 and 55562 workaround
if (2147483647 == $len) return grapheme_substr($s, $start);
$slen = grapheme_strlen($s);
$start = (int) $start;
if (0 > $start) $start += $slen;
if (0 > $start) return false;
if ($start >= $slen) return false;
$rem = $slen - $start;
if (0 > $len) $len += $rem;
if (0 === $len) return '';
if (0 > $len) return false;
if ($len > $rem) $len = $rem;
return grapheme_substr($s, $start, $len);
}
static function grapheme_strpos ($s, $needle, $offset = 0) {return self::grapheme_position($s, $needle, $offset, 0);}
static function grapheme_stripos ($s, $needle, $offset = 0) {return self::grapheme_position($s, $needle, $offset, 1);}
static function grapheme_strrpos ($s, $needle, $offset = 0) {return self::grapheme_position($s, $needle, $offset, 2);}
static function grapheme_strripos($s, $needle, $offset = 0) {return self::grapheme_position($s, $needle, $offset, 3);}
static function grapheme_stristr ($s, $needle, $before_needle = false) {return mb_stristr($s, $needle, $before_needle, 'UTF-8');}
static function grapheme_strstr ($s, $needle, $before_needle = false) {return mb_strstr ($s, $needle, $before_needle, 'UTF-8');}
protected static function grapheme_position($s, $needle, $offset, $mode)
{
if ($offset > 0) $s = (string) self::grapheme_substr($s, $offset);
else if ($offset < 0) $offset = 0;
if ('' === (string) $needle) return false;
if ('' === (string) $s) return false;
switch ($mode)
{
case 0: $needle = iconv_strpos ($s, $needle, 0, 'UTF-8'); break;
case 1: $needle = mb_stripos ($s, $needle, 0, 'UTF-8'); break;
case 2: $needle = iconv_strrpos($s, $needle, 'UTF-8'); break;
default: $needle = mb_strripos ($s, $needle, 0, 'UTF-8'); break;
}
return $needle ? self::grapheme_strlen(iconv_substr($s, 0, $needle, 'UTF-8')) + $offset : $needle;
}
}