借鉴 pdf.js
源码,实现文本搜索功能,包含大小写敏感和全字匹配选项,话不多说,直接上码
var CharacterType = { SPACE: 0, ALPHA_LETTER: 1, PUNCT: 2, HAN_LETTER: 3, KATAKANA_LETTER: 4, HIRAGANA_LETTER: 5, HALFWIDTH_KATAKANA_LETTER: 6, THAI_LETTER: 7 } function isAlphabeticalScript(charCode) { return charCode < 0x2E80 } function isAscii(charCode) { return (charCode & 0xFF80) === 0 } function isAsciiAlpha(charCode) { return charCode >= 0x61 && charCode <= 0x7A || charCode >= 0x41 && charCode <= 0x5A } function isAsciiDigit(charCode) { return charCode >= 0x30 && charCode <= 0x39 } function isAsciiSpace(charCode) { return charCode === 0x20 || charCode === 0x09 || charCode === 0x0D || charCode === 0x0A } function isThai(charCode) { return (charCode & 0xFF80) === 0x0E00 } function isHan(charCode) { return charCode >= 0x3400 && charCode <= 0x9FFF || charCode >= 0xF900 && charCode <= 0xFAFF } function isKatakana(charCode) { return charCode >= 0x30A0 && charCode <= 0x30FF } function isHiragana(charCode) { return charCode >= 0x3040 && charCode <= 0x309F } function isHalfwidthKatakana(charCode) { return charCode >= 0xFF60 && charCode <= 0xFF9F } function getCharacterType(charCode) { if (isAlphabeticalScript(charCode)) { if (isAscii(charCode)) { if (isAsciiSpace(charCode)) { return CharacterType.SPACE } else if (isAsciiAlpha(charCode) || isAsciiDigit(charCode) || charCode === 0x5F) { return CharacterType.ALPHA_LETTER } return CharacterType.PUNCT } else if (isThai(charCode)) { return CharacterType.THAI_LETTER } else if (charCode === 0xA0) { return CharacterType.SPACE } return CharacterType.ALPHA_LETTER } if (isHan(charCode)) { return CharacterType.HAN_LETTER } else if (isKatakana(charCode)) { return CharacterType.KATAKANA_LETTER } else if (isHiragana(charCode)) { return CharacterType.HIRAGANA_LETTER } else if (isHalfwidthKatakana(charCode)) { return CharacterType.HALFWIDTH_KATAKANA_LETTER } return CharacterType.ALPHA_LETTER } function isEntireWord(content, matchIdx, length) { var startIdx = matchIdx if (startIdx > 0) { var first = content.charCodeAt(startIdx) var limit = content.charCodeAt(startIdx - 1) if (getCharacterType(first) === getCharacterType(limit)) { return false } } var endIdx = matchIdx + length - 1 if (endIdx < content.length - 1) { var last = content.charCodeAt(endIdx) var limit = content.charCodeAt(endIdx + 1) if (getCharacterType(last) === getCharacterType(limit)) { return false } } return true } /** * 在特定文本中搜索指定内容,返回结果索引 * @param {String} query 要查询的内容 * @param {String} content 待搜索的文本 * @param {Boolean} caseSensitive 大小写敏感 * @param {Boolean} entireWord 全字匹配 * @return {[Number]} 结果索引 */ function search(query, content, caseSensitive, entireWord) { if (query.length === 0) { return } if (!caseSensitive) { query = query.toLowerCase() content = content.toLowerCase() } var matchRst = [], matchIdx = -query.length, queryLen = query.length while (true) { matchIdx = content.indexOf(query, matchIdx + queryLen) if (matchIdx === -1) { break } if (entireWord && !isEntireWord(content, matchIdx, queryLen)) { continue } matchRst.push(matchIdx) } return matchRst }
一个用于测试的例子
var content = 'Say Hello To Tomorrow. Say Goodbye To Yesterday.' var query = 'say' var result = search(query, content, true, false) console.log(result) // [] var result = search(query, content, false, false) console.log(result) // [0, 23] var query = 'Good' var result = search(query, content, true, false) console.log(result) // [27] var result = search(query, content, true, true) console.log(result) // []