Source: lib/util/string_utils.js

  1. /**
  2. * @license
  3. * Copyright 2016 Google Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. goog.provide('shaka.util.StringUtils');
  18. goog.require('shaka.util.Error');
  19. /**
  20. * @namespace shaka.util.StringUtils
  21. * @summary A set of string utility functions.
  22. */
  23. /**
  24. * Creates a string from the given buffer as UTF-8 encoding.
  25. *
  26. * @param {?BufferSource} data
  27. * @return {string}
  28. * @throws {shaka.util.Error}
  29. */
  30. shaka.util.StringUtils.fromUTF8 = function(data) {
  31. if (!data) return '';
  32. // http://stackoverflow.com/a/13691499
  33. var utf8 = shaka.util.StringUtils.fromCharCode_(new Uint8Array(data));
  34. // This converts each character in the string to an escape sequence. If the
  35. // character is in the ASCII range, it is not converted; otherwise it is
  36. // converted to a URI escape sequence.
  37. // Example: '\x67\x35\xe3\x82\xac' -> 'g#%E3%82%AC'
  38. var escaped = escape(utf8);
  39. // Decode the escaped sequence. This will interpret UTF-8 sequences into the
  40. // correct character.
  41. // Example: 'g#%E3%82%AC' -> 'g#€'
  42. try {
  43. return decodeURIComponent(escaped);
  44. } catch (e) {
  45. throw new shaka.util.Error(
  46. shaka.util.Error.Category.TEXT, shaka.util.Error.Code.BAD_ENCODING);
  47. }
  48. };
  49. /**
  50. * Creates a string from the given buffer as UTF-16 encoding.
  51. *
  52. * @param {?BufferSource} data
  53. * @param {boolean} littleEndian true to read little endian, false to read big.
  54. * @return {string}
  55. * @throws {shaka.util.Error}
  56. */
  57. shaka.util.StringUtils.fromUTF16 = function(data, littleEndian) {
  58. if (!data) return '';
  59. if (data.byteLength % 2 != 0) {
  60. shaka.log.error('Data has an incorrect length, must be even.');
  61. throw new shaka.util.Error(
  62. shaka.util.Error.Category.TEXT, shaka.util.Error.Code.BAD_ENCODING);
  63. }
  64. /** @type {ArrayBuffer} */
  65. var buffer;
  66. if (data instanceof ArrayBuffer) {
  67. buffer = data;
  68. } else {
  69. // Have to create a new buffer because the argument may be a smaller
  70. // view on a larger ArrayBuffer. We cannot use an ArrayBufferView in
  71. // a DataView.
  72. var temp = new Uint8Array(data.byteLength);
  73. temp.set(new Uint8Array(data));
  74. buffer = temp.buffer;
  75. }
  76. // Use a DataView to ensure correct endianness.
  77. var length = data.byteLength / 2;
  78. var arr = new Uint16Array(length);
  79. var dataView = new DataView(buffer);
  80. for (var i = 0; i < length; i++) {
  81. arr[i] = dataView.getUint16(i * 2, littleEndian);
  82. }
  83. return shaka.util.StringUtils.fromCharCode_(arr);
  84. };
  85. /**
  86. * Creates a string from the given buffer, auto-detecting the encoding that is
  87. * being used. If it cannot detect the encoding, it will throw an exception.
  88. *
  89. * @param {?BufferSource} data
  90. * @return {string}
  91. * @throws {shaka.util.Error}
  92. */
  93. shaka.util.StringUtils.fromBytesAutoDetect = function(data) {
  94. var StringUtils = shaka.util.StringUtils;
  95. var uint8 = new Uint8Array(data);
  96. if (uint8[0] == 0xef && uint8[1] == 0xbb && uint8[2] == 0xbf)
  97. return StringUtils.fromUTF8(uint8.subarray(3));
  98. else if (uint8[0] == 0xfe && uint8[1] == 0xff)
  99. return StringUtils.fromUTF16(uint8.subarray(2), false /* littleEndian */);
  100. else if (uint8[0] == 0xff && uint8[1] == 0xfe)
  101. return StringUtils.fromUTF16(uint8.subarray(2), true /* littleEndian */);
  102. var isAscii = (function(arr, i) {
  103. // arr[i] >= ' ' && arr[i] <= '~';
  104. return arr.byteLength <= i || (arr[i] >= 0x20 && arr[i] <= 0x7e);
  105. }.bind(null, uint8));
  106. shaka.log.debug('Unable to find byte-order-mark, making an educated guess.');
  107. if (uint8[0] == 0 && uint8[2] == 0)
  108. return StringUtils.fromUTF16(data, false /* littleEndian */);
  109. else if (uint8[1] == 0 && uint8[3] == 0)
  110. return StringUtils.fromUTF16(data, true /* littleEndian */);
  111. else if (isAscii(0) && isAscii(1) && isAscii(2) && isAscii(3))
  112. return StringUtils.fromUTF8(data);
  113. throw new shaka.util.Error(
  114. shaka.util.Error.Category.TEXT,
  115. shaka.util.Error.Code.UNABLE_TO_DETECT_ENCODING);
  116. };
  117. /**
  118. * Creates a ArrayBuffer from the given string, converting to UTF-8 encoding.
  119. *
  120. * @param {string} str
  121. * @return {!ArrayBuffer}
  122. */
  123. shaka.util.StringUtils.toUTF8 = function(str) {
  124. // http://stackoverflow.com/a/13691499
  125. // Converts the given string to a URI encoded string. If a character falls
  126. // in the ASCII range, it is not converted; otherwise it will be converted to
  127. // a series of URI escape sequences according to UTF-8.
  128. // Example: 'g#€' -> 'g#%E3%82%AC'
  129. var encoded = encodeURIComponent(str);
  130. // Convert each escape sequence individually into a character. Each escape
  131. // sequence is interpreted as a code-point, so if an escape sequence happens
  132. // to be part of a multi-byte sequence, each byte will be converted to a
  133. // single character.
  134. // Example: 'g#%E3%82%AC' -> '\x67\x35\xe3\x82\xac'
  135. var utf8 = unescape(encoded);
  136. var result = new Uint8Array(utf8.length);
  137. for (var i = 0; i < utf8.length; ++i) {
  138. result[i] = utf8.charCodeAt(i);
  139. }
  140. return result.buffer;
  141. };
  142. /**
  143. * Creates a new string from the given array of char codes.
  144. *
  145. * @param {!ITypedArray} args
  146. * @return {string}
  147. * @private
  148. */
  149. shaka.util.StringUtils.fromCharCode_ = function(args) {
  150. var max = 16000;
  151. var ret = '';
  152. for (var i = 0; i < args.length; i += max) {
  153. var subArray = args.subarray(i, i + max);
  154. ret += String.fromCharCode.apply(null, subArray);
  155. }
  156. return ret;
  157. };