Skip to content

Commit

Permalink
Feat: Add support for Unicode boundaries (#5265)
Browse files Browse the repository at this point in the history
Add support for Unicode boundaries and detection methods

    Introduced Unicode boundaries support in text search
    Added supportsLookbehind and supportsUnicodeFlag methods in lang for feature detection
    Implemented fallback to ASCII boundaries when the browser does not support look-behinds
    Implemented fallback to old behaviour (without unicode support) in rare edge cases for backward compatibility
  • Loading branch information
mkslanc authored Aug 4, 2023
1 parent b196806 commit 1e6fcf3
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 28 deletions.
18 changes: 18 additions & 0 deletions src/lib/lang.js
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,21 @@ exports.delayedCall = function(fcn, defaultTimeout) {

return _self;
};

exports.supportsLookbehind = function () {
try {
new RegExp('(?<=.)');
} catch (e) {
return false;
}
return true;
};

exports.supportsUnicodeFlag = function () {
try {
new RegExp('^.$', 'u');
} catch (error) {
return false;
}
return true;
};
100 changes: 72 additions & 28 deletions src/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,34 @@ var Range = require("./range").Range;
class Search {
/**
* Creates a new `Search` object. The following search options are available:
*
* - `needle`: The string or regular expression you're looking for
* - `backwards`: Whether to search backwards from where cursor currently is. Defaults to `false`.
* - `wrap`: Whether to wrap the search back to the beginning when it hits the end. Defaults to `false`.
* - `caseSensitive`: Whether the search ought to be case-sensitive. Defaults to `false`.
* - `wholeWord`: Whether the search matches only on whole words. Defaults to `false`.
* - `range`: The [[Range]] to search within. Set this to `null` for the whole document
* - `regExp`: Whether the search is a regular expression or not. Defaults to `false`.
* - `start`: The starting [[Range]] or cursor position to begin the search
* - `skipCurrent`: Whether or not to include the current line in the search. Default to `false`.
*
* @typedef SearchOptions
*
* @property {string|RegExp} [needle] - The string or regular expression you're looking for
* @property {boolean} [backwards] - Whether to search backwards from where cursor currently is
* @property {boolean} [wrap] - Whether to wrap the search back to the beginning when it hits the end
* @property {boolean} [caseSensitive] - Whether the search ought to be case-sensitive
* @property {boolean} [wholeWord] - Whether the search matches only on whole words
* @property {Range|null} [range] - The [[Range]] to search within. Set this to `null` for the whole document
* @property {boolean} [regExp] - Whether the search is a regular expression or not
* @property {Range|Position} [start] - The starting [[Range]] or cursor position to begin the search
* @property {boolean} [skipCurrent] - Whether or not to include the current line in the search
* @property {boolean} [$isMultiLine] - true, if needle has \n or \r\n
* @property {boolean} [preserveCase]
* @property {boolean} [preventScroll]
* @property {boolean} [$supportsUnicodeFlag] - internal property, determine if browser supports unicode flag
* @property {any} [re]
**/

constructor() {
/**
* @type {SearchOptions}
*/
this.$options = {};
}

/**
* Sets the search options via the `options` parameter.
* @param {Object} options An object containing all the new search properties
*
*
* @param {SearchOptions} options An object containing all the new search properties
* @returns {Search}
* @chainable
**/
Expand All @@ -41,27 +48,26 @@ class Search {

/**
* [Returns an object containing all the search options.]{: #Search.getOptions}
* @returns {Object}
* @returns {SearchOptions}
**/
getOptions() {
return lang.copyObject(this.$options);
}

/**
* Sets the search options via the `options` parameter.
* @param {Object} options object containing all the search propertie
* @param {SearchOptions} options object containing all the search propertie
* @related Search.set
**/
setOptions(options) {
this.$options = options;
}

/**
* Searches for `options.needle`. If found, this method returns the [[Range `Range`]] where the text first occurs. If `options.backwards` is `true`, the search goes backwards in the session.
* @param {EditSession} session The session to search with
*
*
* @returns {Range}
**/
* @returns {Range|boolean}
**/
find(session) {
var options = this.$options;
var iterator = this.$matchIterator(session, options);
Expand All @@ -87,9 +93,7 @@ class Search {
/**
* Searches for all occurrances `options.needle`. If found, this method returns an array of [[Range `Range`s]] where the text first occurs. If `options.backwards` is `true`, the search goes backwards in the session.
* @param {EditSession} session The session to search with
*
*
* @returns {[Range]}
* @returns {Range[]}
**/
findAll(session) {
var options = this.$options;
Expand Down Expand Up @@ -200,15 +204,31 @@ class Search {
return replacement;
}

/**
*
* @param {SearchOptions} options
* @param $disableFakeMultiline
* @return {RegExp|boolean|*[]|*}
*/
$assembleRegExp(options, $disableFakeMultiline) {
if (options.needle instanceof RegExp)
return options.re = options.needle;

var needle = options.needle;

if (!options.needle)
return options.re = false;

if (options.$supportsUnicodeFlag === undefined) {
options.$supportsUnicodeFlag = lang.supportsUnicodeFlag();
}

try {
new RegExp(needle, "u");
} catch (e) {
options.$supportsUnicodeFlag = false; //left for backward compatibility with previous versions for cases like /ab\{2}/gu
}

if (!options.regExp)
needle = lang.escapeRegExp(needle);

Expand All @@ -217,6 +237,10 @@ class Search {

var modifier = options.caseSensitive ? "gm" : "gmi";

if (options.$supportsUnicodeFlag) {
modifier += "u";
}

options.$isMultiLine = !$disableFakeMultiline && /[\n\r]/.test(needle);
if (options.$isMultiLine)
return options.re = this.$assembleMultilineRegExp(needle, modifier);
Expand Down Expand Up @@ -356,13 +380,33 @@ class Search {

}

/**
*
* @param {string} needle
* @param {SearchOptions} options
* @return {string}
*/
function addWordBoundary(needle, options) {
function wordBoundary(c) {
if (/\w/.test(c) || options.regExp) return "\\b";
let supportsLookbehind = lang.supportsLookbehind();

function wordBoundary(c, firstChar = true) {
let wordRegExp = supportsLookbehind && options.$supportsUnicodeFlag ? new RegExp("[\\p{L}\\p{N}_]","u") : new RegExp("\\w");

if (wordRegExp.test(c) || options.regExp) {
if (supportsLookbehind && options.$supportsUnicodeFlag) {
if (firstChar) return "(?<=^|[^\\p{L}\\p{N}_])";
return "(?=[^\\p{L}\\p{N}_]|$)";
}
return "\\b";
}
return "";
}
return wordBoundary(needle[0]) + needle
+ wordBoundary(needle[needle.length - 1]);

let needleArray = Array.from(needle);
let firstChar = needleArray[0];
let lastChar = needleArray[needleArray.length - 1];

return wordBoundary(firstChar) + needle + wordBoundary(lastChar, false);
}

exports.Search = Search;
34 changes: 34 additions & 0 deletions src/search_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,40 @@ module.exports = {
assert.position(range.end, 1, 12);
},

"test: fallback to nonUnicode mode on edge cases": function() {
var session = new EditSession([
/* eslint-disable no-octal-escape*/
"string with \251 symbol", // test octal escape sequence
"bracket ab{2}" // test lone quantifier brackets
]);

var search = new Search().set({
needle: "\\251",
regExp: true
});
var range = search.find(session);
assert.position(range.start, 0, 12);
assert.position(range.end, 0, 13);

search.set({ needle: "ab\\{2}" });
range = search.find(session);
assert.position(range.start, 1, 8);
assert.position(range.end, 1, 13);
},

"test: whole word search should not match inside of words with unicode": function() {
var session = new EditSession(["𝓗ello𝓦orld", "𝓗ello 𝓦orld 123", "456"]);

var search = new Search().set({
needle: "𝓗ello",
wholeWord: true
});

var range = search.find(session);
assert.position(range.start, 1, 0);
assert.position(range.end, 1, 6);
},

"test: find backwards": function() {
var session = new EditSession(["juhu juhu juhu juhu"]);
session.getSelection().moveCursorTo(0, 10);
Expand Down

0 comments on commit 1e6fcf3

Please sign in to comment.