Strings

Quotes

// recap
let single = 'single-quoted';
let double = "double-quoted";
let backticks = `backticks`;

// backtick can put expression
function sum(a, b) {
  return a + b;
}

alert(`1 + 2 = ${sum(1, 2)}.`); // 1 + 2 = 3.

// span multiple lines
let guestList = `Guests:
 * John
 * Pete
 * Mary
`;

alert(guestList); // a list of guests, multiple lines

// error
let guestList = "Guests:  // Error: Unexpected token ILLEGAL
  * John";

Special characters

let guestList = "Guests:\n * John\n * Pete\n * Mary";
alert(guestList); // a multiline list of guests

alert( "Hello\nWorld" ); // two lines using a "newline symbol"
// two lines using a normal newline and backticks
alert( `Hello
World` );

Character

Description

\b

Backspace

\f

Form feed

\n

New line

\r

Carriage return

\t

Tab

\uNNNN

A unicode symbol with the hex code NNNN, for instance \u00A9 – is a unicode for the copyright symbol ©. It must be exactly 4 hex digits.

\u{NNNNNNNN}

Some rare characters are encoded with two unicode symbols, taking up to 4 bytes. This long unicode requires braces around it.

alert( "\u00A9" ); // ©
alert( "\u{20331}" ); // 佫, a rare chinese hieroglyph (long unicode)
alert( "\u{1F60D}" ); // 😍, a smiling face symbol (another long unicode)

用 \ 使用特殊符號

alert( 'I\'m the Walrus!' ); // I'm the Walrus!
alert( `I'm the Walrus!` ); // I'm the Walrus!
alert( `The backslash: \\` ); // The backslash: \

String length

.length 是一個物件屬性,不是方法因此不需要加 (),返回 string 的長度。

alert( `My\n`.length ); // 3

Accessing characters

取得特定位置的字母可以使用 2 種方法,第 1 個直接在字串後加上 [..],第 2 種使用 .charAt() 方法; 2種辦法的差異在沒有東西返回的時候,[...] 返回 undefined, .charAt() 返回空字串。

let str = `Hello`;

// the first character
alert( str[0] ); // H
alert( str.charAt(0) ); // H

// the last character
alert( str[str.length - 1] ); // o

// 不同之處
let str = `Hello`;

alert( str[1000] ); // undefined
alert( str.charAt(1000) ); // '' (an empty string)

// 字串迴圈
for (let char of "Hello") {
  alert(char); // H,e,l,l,o (char becomes "H", then "e", then "l" etc)
}

Strings are immutable

// 字串不能修改
let str = 'Hi';
str[0] = 'h'; // error
alert( str[0] ); // doesn't work

// 只能重新儲存新的值
let str = 'Hi';
str = 'h' + str[1];  // replace the string
alert( str ); // hi

Changing the case

alert( 'Interface'.toUpperCase() ); // INTERFACE
alert( 'Interface'.toLowerCase() ); // interface

// 改變單一字母大小寫
alert( 'Interface'[0].toLowerCase() ); // 'i'

Searching for a substring

str.indexOf 返回尋找字串的位置。

let str = 'Widget with id';
alert( str.indexOf('Widget') ); // 0, because 'Widget' is found at the beginning
alert( str.indexOf('widget') ); // -1, not found, the search is case-sensitive
alert( str.indexOf("id") ); // 1, "id" is found at the position 1 (..idget with id)

let str = 'Widget with id';
alert( str.indexOf('id', 2) ) // 12

// use in loop
let str = 'As sly as a fox, as strong as an ox';
let target = 'as'; // let's look for it
let pos = 0;

while (true) {
  let foundPos = str.indexOf(target, pos);
  if (foundPos == -1) break;

  alert( `Found at ${foundPos}` );
  pos = foundPos + 1; // continue the search from the next position
}

// shorter
let str = "As sly as a fox, as strong as an ox";
let target = "as";

let pos = -1;
while ((pos = str.indexOf(target, pos + 1)) != -1) {
  alert( pos );
}

// 不能直接當判斷式
let str = "Widget with id";

if (str.indexOf("Widget")) {
    alert("We found it"); // doesn't work!
}

// 要換另一種寫法
let str = "Widget with id";

if (str.indexOf("Widget") != -1) {
    alert("We found it"); // works now!
}

str.lastIndexOf(substr, position)

'canal'.lastIndexOf('a');     // returns 3
'canal'.lastIndexOf('a', 2);  // returns 1
'canal'.lastIndexOf('a', 0);  // returns -1

The bitwise NOT trick

alert( ~2 ); // -3, the same as -(2+1)
alert( ~1 ); // -2, the same as -(1+1)
alert( ~0 ); // -1, the same as -(0+1)
alert( ~-1 ); // 0, the same as -(-1+1)

// 在老的程式碼使用這樣的方法當判斷式
let str = "Widget";

if (~str.indexOf("Widget")) {
  alert( 'Found it!' ); // works
}

includes, startsWith, endsWith 檢查字串是否有子字串,返回布林值。

// includes
alert( "Widget with id".includes("Widget") ); // true
alert( "Hello".includes("Bye") ); // false
alert( "Midget".includes("id") ); // true
alert( "Midget".includes("id", 3) ); // false, from position 3 there is no "id"

// startsWith & endsWith
alert( "Widget".startsWith("Wid") ); // true, "Widget" starts with "Wid"
alert( "Widget".endsWith("get") );   // true, "Widget" ends with "get"

Getting a substring

  • str.slice(start [, end]) 返回開始點到結束點的部分字串

    let str = "stringify";
    alert( str.slice(0, 5) ); // 'strin', the substring from 0 to 5 (not including 5)
    alert( str.slice(0, 1) ); // 's', from 0 to 1, but not including 1, so only character at 0
    
    // 沒有第 2 個參數直接到結尾
    let str = "stringify";
    alert( str.slice(2) ); // ringify, from the 2nd position till the end
    
    // 位置為負數也可以
    let str = "stringify";
    // start at the 4th position from the right, end at the 1st from the right
    alert( str.slice(-4, -1) ); // gif
  • str.substring(start [, end]).slice() 一樣,唯一不同開始點可以大於結束點。

    let str = "stringify";
    
    // these are same for substring
    alert( str.substring(2, 6) ); // "ring"
    alert( str.substring(6, 2) ); // "ring"
    
    // ...but not for slice:
    alert( str.slice(2, 6) ); // "ring" (the same)
    alert( str.slice(6, 2) ); // "" (an empty string)
  • str.substr(start [, length]) 返回從開始點計算長度的字串

    let str = "stringify";
    alert( str.substr(2, 4) ); // ring, from the 2nd position get 4 characters
    
    let str = "stringify";
    alert( str.substr(-4, 2) ); // gi, from the 4th position get 2 characters

method

selects…

negatives

slice(start, end)

from start to end (not including end)

allows negatives

substring(start, end)

between start and end

negative values mean 0

substr(start, length)

from start get length characters

allows negative start

Which one to choose?

substr(...) 不適寫在核心的 JavaScript 中,他可以在瀏覽器環境執行,其他環境可能不行。大多數都使用 slice(...)。

Comparing strings

// 小寫字母永遠大於大寫字母
alert( 'a' > 'Z' ); // true

// 帶特殊符號的字母大於正常字母
alert( 'Österreich' > 'Zealand' ); // true
  • str.codePointAt(pos) 返回字母的編碼位置

    // different case letters have different codes
    alert( "z".codePointAt(0) ); // 122
    alert( "Z".codePointAt(0) ); // 90
  • String.fromCodePoint(code) 返回編碼位置的字母

    alert( String.fromCodePoint(90) ); // Z
    
    let str = '';
    for (let i = 65; i <= 220; i++) {
      str += String.fromCodePoint(i);
    }
    alert( str );
    // ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~€‚ƒ„
    // ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜ

Correct comparisons

str.localeCompare(str2) 適用所有瀏覽器,比較字母的大小

  • Returns 1 if str is greater than str2 according to the language rules.

  • Returns -1 if str is less than str2.

  • Returns 0 if they are equal.

alert( 'Österreich'.localeCompare('Zealand') ); // -1

Internals, Unicode

Surrogate pairs

alert( '𝒳'.length ); // 2, MATHEMATICAL SCRIPT CAPITAL X
alert( '😂'.length ); // 2, FACE WITH TEARS OF JOY
alert( '𙷶'.length ); // 2, a rare chinese hieroglyph

alert( '𝒳'[0] ); // 奇怪的符号...
alert( '𝒳'[1] ); // ...代理对的一块

// charCodeAt is not surrogate-pair aware, so it gives codes for parts

alert( '𝒳'.charCodeAt(0).toString(16) ); // d835, 在 0xd800 和 0xdbff 之间
alert( '𝒳'.charCodeAt(1).toString(16) ); // dcb3, 在 0xdc00 和 0xdfff 之间

Diacritical marks and normalization

alert( 'S\u0307' ); // Ṡ
alert( 'S\u0307\u0323' ); // Ṩ

// 同樣特殊符號會因為組成順序不同而不一樣
alert( 'S\u0307\u0323' ); // Ṩ, S + dot above + dot below
alert( 'S\u0323\u0307' ); // Ṩ, S + dot below + dot above
alert( 'S\u0307\u0323' == 'S\u0323\u0307' ); // false

// normalize() 將 3 個符號轉換成 1 個符號
alert( "S\u0307\u0323".normalize() == "S\u0323\u0307".normalize() ); // true
alert( "S\u0307\u0323".normalize().length ); // 1
alert( "S\u0307\u0323".normalize() == "\u1e68" ); // true

Last updated