1: <?php
2: /**
3: * PHPWord
4: *
5: * Copyright (c) 2013 PHPWord
6: *
7: * This library is free software; you can redistribute it and/or
8: * modify it under the terms of the GNU Lesser General Public
9: * License as published by the Free Software Foundation; either
10: * version 2.1 of the License, or (at your option) any later version.
11: *
12: * This library is distributed in the hope that it will be useful,
13: * but WITHOUT ANY WARRANTY; without even the implied warranty of
14: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15: * Lesser General Public License for more details.
16: *
17: * You should have received a copy of the GNU Lesser General Public
18: * License along with this library; if not, write to the Free Software
19: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20: *
21: * @category PHPWord
22: * @package PHPWord
23: * @copyright Copyright (c) 2013 PHPWord
24: * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
25: * @version 0.7.0
26: */
27:
28: /**
29: * Class PHPWord_Shared_String
30: */
31: class PHPWord_Shared_String
32: {
33: /**
34: * Control characters array
35: *
36: * @var string[]
37: */
38: private static $_controlCharacters = array();
39:
40: /**
41: * Is mbstring extension avalable?
42: *
43: * @var boolean
44: */
45: private static $_isMbstringEnabled;
46:
47: /**
48: * Is iconv extension avalable?
49: *
50: * @var boolean
51: */
52: private static $_isIconvEnabled;
53:
54: /**
55: * Build control characters array
56: */
57: private static function _buildControlCharacters()
58: {
59: for ($i = 0; $i <= 19; ++$i) {
60: if ($i != 9 && $i != 10 && $i != 13) {
61: $find = '_x' . sprintf('%04s', strtoupper(dechex($i))) . '_';
62: $replace = chr($i);
63: self::$_controlCharacters[$find] = $replace;
64: }
65: }
66: }
67:
68: /**
69: * Get whether mbstring extension is available
70: *
71: * @return boolean
72: */
73: public static function getIsMbstringEnabled()
74: {
75: if (isset(self::$_isMbstringEnabled)) {
76: return self::$_isMbstringEnabled;
77: }
78:
79: self::$_isMbstringEnabled = function_exists('mb_convert_encoding') ?
80: true : false;
81:
82: return self::$_isMbstringEnabled;
83: }
84:
85: /**
86: * Get whether iconv extension is available
87: *
88: * @return boolean
89: */
90: public static function getIsIconvEnabled()
91: {
92: if (isset(self::$_isIconvEnabled)) {
93: return self::$_isIconvEnabled;
94: }
95:
96: self::$_isIconvEnabled = function_exists('iconv') ?
97: true : false;
98:
99: return self::$_isIconvEnabled;
100: }
101:
102: /**
103: * Convert from OpenXML escaped control character to PHP control character
104: *
105: * Excel 2007 team:
106: * ----------------
107: * That's correct, control characters are stored directly in the shared-strings table.
108: * We do encode characters that cannot be represented in XML using the following escape sequence:
109: * _xHHHH_ where H represents a hexadecimal character in the character's value...
110: * So you could end up with something like _x0008_ in a string (either in a cell value (<v>)
111: * element or in the shared string <t> element.
112: *
113: * @param string $value Value to unescape
114: * @return string
115: */
116: public static function ControlCharacterOOXML2PHP($value = '')
117: {
118: if (empty(self::$_controlCharacters)) {
119: self::_buildControlCharacters();
120: }
121:
122: return str_replace(array_keys(self::$_controlCharacters), array_values(self::$_controlCharacters), $value);
123: }
124:
125: /**
126: * Convert from PHP control character to OpenXML escaped control character
127: *
128: * Excel 2007 team:
129: * ----------------
130: * That's correct, control characters are stored directly in the shared-strings table.
131: * We do encode characters that cannot be represented in XML using the following escape sequence:
132: * _xHHHH_ where H represents a hexadecimal character in the character's value...
133: * So you could end up with something like _x0008_ in a string (either in a cell value (<v>)
134: * element or in the shared string <t> element.
135: *
136: * @param string $value Value to escape
137: * @return string
138: */
139: public static function ControlCharacterPHP2OOXML($value = '')
140: {
141: if (empty(self::$_controlCharacters)) {
142: self::_buildControlCharacters();
143: }
144:
145: return str_replace(array_values(self::$_controlCharacters), array_keys(self::$_controlCharacters), $value);
146: }
147:
148: /**
149: * Check if a string contains UTF-8 data
150: *
151: * @param string $value
152: * @return boolean
153: */
154: public static function IsUTF8($value = '')
155: {
156: return $value === '' || preg_match('/^./su', $value) === 1;
157: }
158:
159: /**
160: * Formats a numeric value as a string for output in various output writers
161: *
162: * @param mixed $value
163: * @return string
164: */
165: public static function FormatNumber($value)
166: {
167: return number_format($value, 2, '.', '');
168: }
169:
170: /**
171: * Converts a UTF-8 string into BIFF8 Unicode string data (8-bit string length)
172: * Writes the string using uncompressed notation, no rich text, no Asian phonetics
173: * If mbstring extension is not available, ASCII is assumed, and compressed notation is used
174: * although this will give wrong results for non-ASCII strings
175: * see OpenOffice.org's Documentation of the Microsoft Excel File Format, sect. 2.5.3
176: *
177: * @param string $value UTF-8 encoded string
178: * @return string
179: */
180: public static function UTF8toBIFF8UnicodeShort($value)
181: {
182: // character count
183: $ln = self::CountCharacters($value, 'UTF-8');
184:
185: // option flags
186: $opt = (self::getIsMbstringEnabled() || self::getIsIconvEnabled()) ?
187: 0x0001 : 0x0000;
188:
189: // characters
190: $chars = self::ConvertEncoding($value, 'UTF-16LE', 'UTF-8');
191:
192: $data = pack('CC', $ln, $opt) . $chars;
193: return $data;
194: }
195:
196: /**
197: * Converts a UTF-8 string into BIFF8 Unicode string data (16-bit string length)
198: * Writes the string using uncompressed notation, no rich text, no Asian phonetics
199: * If mbstring extension is not available, ASCII is assumed, and compressed notation is used
200: * although this will give wrong results for non-ASCII strings
201: * see OpenOffice.org's Documentation of the Microsoft Excel File Format, sect. 2.5.3
202: *
203: * @param string $value UTF-8 encoded string
204: * @return string
205: */
206: public static function UTF8toBIFF8UnicodeLong($value)
207: {
208: // character count
209: $ln = self::CountCharacters($value, 'UTF-8');
210:
211: // option flags
212: $opt = (self::getIsMbstringEnabled() || self::getIsIconvEnabled()) ?
213: 0x0001 : 0x0000;
214:
215: // characters
216: $chars = self::ConvertEncoding($value, 'UTF-16LE', 'UTF-8');
217:
218: $data = pack('vC', $ln, $opt) . $chars;
219: return $data;
220: }
221:
222: /**
223: * Convert string from one encoding to another. First try mbstring, then iconv, or no convertion
224: *
225: * @param string $value
226: * @param string $to Encoding to convert to, e.g. 'UTF-8'
227: * @param string $from Encoding to convert from, e.g. 'UTF-16LE'
228: * @return string
229: */
230: public static function ConvertEncoding($value, $to, $from)
231: {
232: if (self::getIsMbstringEnabled()) {
233: $value = mb_convert_encoding($value, $to, $from);
234: return $value;
235: }
236:
237: if (self::getIsIconvEnabled()) {
238: $value = iconv($from, $to, $value);
239: return $value;
240: }
241:
242: // else, no conversion
243: return $value;
244: }
245:
246: /**
247: * Get character count. First try mbstring, then iconv, finally strlen
248: *
249: * @param string $value
250: * @param string $enc Encoding
251: * @return int Character count
252: */
253: public static function CountCharacters($value, $enc = 'UTF-8')
254: {
255: if (self::getIsMbstringEnabled()) {
256: $count = mb_strlen($value, $enc);
257: return $count;
258: }
259:
260: if (self::getIsIconvEnabled()) {
261: $count = iconv_strlen($value, $enc);
262: return $count;
263: }
264:
265: // else strlen
266: $count = strlen($value);
267: return $count;
268: }
269:
270: }
271: