You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
409 lines
10 KiB
JavaScript
409 lines
10 KiB
JavaScript
5 years ago
|
/*!
|
||
|
* RegJSGen
|
||
|
* Copyright 2014 Benjamin Tan <https://d10.github.io/>
|
||
|
* Available under MIT license <http://d10.mit-license.org/>
|
||
|
*/
|
||
|
;(function() {
|
||
|
'use strict';
|
||
|
|
||
|
/** Used to determine if values are of the language type `Object` */
|
||
|
var objectTypes = {
|
||
|
'function': true,
|
||
|
'object': true
|
||
|
};
|
||
|
|
||
|
/** Used as a reference to the global object */
|
||
|
var root = (objectTypes[typeof window] && window) || this;
|
||
|
|
||
|
/** Backup possible global object */
|
||
|
var oldRoot = root;
|
||
|
|
||
|
/** Detect free variable `exports` */
|
||
|
var freeExports = objectTypes[typeof exports] && exports;
|
||
|
|
||
|
/** Detect free variable `module` */
|
||
|
var freeModule = objectTypes[typeof module] && module && !module.nodeType && module;
|
||
|
|
||
|
/** Detect free variable `global` from Node.js or Browserified code and use it as `root` */
|
||
|
var freeGlobal = freeExports && freeModule && typeof global == 'object' && global;
|
||
|
if (freeGlobal && (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal || freeGlobal.self === freeGlobal)) {
|
||
|
root = freeGlobal;
|
||
|
}
|
||
|
|
||
|
/*--------------------------------------------------------------------------*/
|
||
|
|
||
|
/*! Based on https://mths.be/fromcodepoint v0.2.0 by @mathias */
|
||
|
|
||
|
var stringFromCharCode = String.fromCharCode;
|
||
|
var floor = Math.floor;
|
||
|
function fromCodePoint() {
|
||
|
var MAX_SIZE = 0x4000;
|
||
|
var codeUnits = [];
|
||
|
var highSurrogate;
|
||
|
var lowSurrogate;
|
||
|
var index = -1;
|
||
|
var length = arguments.length;
|
||
|
if (!length) {
|
||
|
return '';
|
||
|
}
|
||
|
var result = '';
|
||
|
while (++index < length) {
|
||
|
var codePoint = Number(arguments[index]);
|
||
|
if (
|
||
|
!isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
|
||
|
codePoint < 0 || // not a valid Unicode code point
|
||
|
codePoint > 0x10FFFF || // not a valid Unicode code point
|
||
|
floor(codePoint) != codePoint // not an integer
|
||
|
) {
|
||
|
throw RangeError('Invalid code point: ' + codePoint);
|
||
|
}
|
||
|
if (codePoint <= 0xFFFF) {
|
||
|
// BMP code point
|
||
|
codeUnits.push(codePoint);
|
||
|
} else {
|
||
|
// Astral code point; split in surrogate halves
|
||
|
// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
||
|
codePoint -= 0x10000;
|
||
|
highSurrogate = (codePoint >> 10) + 0xD800;
|
||
|
lowSurrogate = (codePoint % 0x400) + 0xDC00;
|
||
|
codeUnits.push(highSurrogate, lowSurrogate);
|
||
|
}
|
||
|
if (index + 1 == length || codeUnits.length > MAX_SIZE) {
|
||
|
result += stringFromCharCode.apply(null, codeUnits);
|
||
|
codeUnits.length = 0;
|
||
|
}
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
function assertType(type, expected) {
|
||
|
if (expected.indexOf('|') == -1) {
|
||
|
if (type == expected) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
throw Error('Invalid node type: ' + type);
|
||
|
}
|
||
|
|
||
|
expected = assertType.hasOwnProperty(expected)
|
||
|
? assertType[expected]
|
||
|
: (assertType[expected] = RegExp('^(?:' + expected + ')$'));
|
||
|
|
||
|
if (expected.test(type)) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
throw Error('Invalid node type: ' + type);
|
||
|
}
|
||
|
|
||
|
/*--------------------------------------------------------------------------*/
|
||
|
|
||
|
function generate(node) {
|
||
|
var type = node.type;
|
||
|
|
||
|
if (generate.hasOwnProperty(type) && typeof generate[type] == 'function') {
|
||
|
return generate[type](node);
|
||
|
}
|
||
|
|
||
|
throw Error('Invalid node type: ' + type);
|
||
|
}
|
||
|
|
||
|
/*--------------------------------------------------------------------------*/
|
||
|
|
||
|
function generateAlternative(node) {
|
||
|
assertType(node.type, 'alternative');
|
||
|
|
||
|
var terms = node.body,
|
||
|
length = terms ? terms.length : 0;
|
||
|
|
||
|
if (length == 1) {
|
||
|
return generateTerm(terms[0]);
|
||
|
} else {
|
||
|
var i = -1,
|
||
|
result = '';
|
||
|
|
||
|
while (++i < length) {
|
||
|
result += generateTerm(terms[i]);
|
||
|
}
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
function generateAnchor(node) {
|
||
|
assertType(node.type, 'anchor');
|
||
|
|
||
|
switch (node.kind) {
|
||
|
case 'start':
|
||
|
return '^';
|
||
|
case 'end':
|
||
|
return '$';
|
||
|
case 'boundary':
|
||
|
return '\\b';
|
||
|
case 'not-boundary':
|
||
|
return '\\B';
|
||
|
default:
|
||
|
throw Error('Invalid assertion');
|
||
|
}
|
||
|
}
|
||
|
|
||
|
function generateAtom(node) {
|
||
|
assertType(node.type, 'anchor|characterClass|characterClassEscape|dot|group|reference|value');
|
||
|
|
||
|
return generate(node);
|
||
|
}
|
||
|
|
||
|
function generateCharacterClass(node) {
|
||
|
assertType(node.type, 'characterClass');
|
||
|
|
||
|
var classRanges = node.body,
|
||
|
length = classRanges ? classRanges.length : 0;
|
||
|
|
||
|
var i = -1,
|
||
|
result = '[';
|
||
|
|
||
|
if (node.negative) {
|
||
|
result += '^';
|
||
|
}
|
||
|
|
||
|
while (++i < length) {
|
||
|
result += generateClassAtom(classRanges[i]);
|
||
|
}
|
||
|
|
||
|
result += ']';
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
function generateCharacterClassEscape(node) {
|
||
|
assertType(node.type, 'characterClassEscape');
|
||
|
|
||
|
return '\\' + node.value;
|
||
|
}
|
||
|
|
||
|
function generateCharacterClassRange(node) {
|
||
|
assertType(node.type, 'characterClassRange');
|
||
|
|
||
|
var min = node.min,
|
||
|
max = node.max;
|
||
|
|
||
|
if (min.type == 'characterClassRange' || max.type == 'characterClassRange') {
|
||
|
throw Error('Invalid character class range');
|
||
|
}
|
||
|
|
||
|
return generateClassAtom(min) + '-' + generateClassAtom(max);
|
||
|
}
|
||
|
|
||
|
function generateClassAtom(node) {
|
||
|
assertType(node.type, 'anchor|characterClassEscape|characterClassRange|dot|value');
|
||
|
|
||
|
return generate(node);
|
||
|
}
|
||
|
|
||
|
function generateDisjunction(node) {
|
||
|
assertType(node.type, 'disjunction');
|
||
|
|
||
|
var body = node.body,
|
||
|
length = body ? body.length : 0;
|
||
|
|
||
|
if (length == 0) {
|
||
|
throw Error('No body');
|
||
|
} else if (length == 1) {
|
||
|
return generate(body[0]);
|
||
|
} else {
|
||
|
var i = -1,
|
||
|
result = '';
|
||
|
|
||
|
while (++i < length) {
|
||
|
if (i != 0) {
|
||
|
result += '|';
|
||
|
}
|
||
|
result += generate(body[i]);
|
||
|
}
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
function generateDot(node) {
|
||
|
assertType(node.type, 'dot');
|
||
|
|
||
|
return '.';
|
||
|
}
|
||
|
|
||
|
function generateGroup(node) {
|
||
|
assertType(node.type, 'group');
|
||
|
|
||
|
var result = '(';
|
||
|
|
||
|
switch (node.behavior) {
|
||
|
case 'normal':
|
||
|
break;
|
||
|
case 'ignore':
|
||
|
result += '?:';
|
||
|
break;
|
||
|
case 'lookahead':
|
||
|
result += '?=';
|
||
|
break;
|
||
|
case 'negativeLookahead':
|
||
|
result += '?!';
|
||
|
break;
|
||
|
default:
|
||
|
throw Error('Invalid behaviour: ' + node.behaviour);
|
||
|
}
|
||
|
|
||
|
var body = node.body,
|
||
|
length = body ? body.length : 0;
|
||
|
|
||
|
if (length == 1) {
|
||
|
result += generate(body[0]);
|
||
|
} else {
|
||
|
var i = -1;
|
||
|
|
||
|
while (++i < length) {
|
||
|
result += generate(body[i]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
result += ')';
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
function generateQuantifier(node) {
|
||
|
assertType(node.type, 'quantifier');
|
||
|
|
||
|
var quantifier = '',
|
||
|
min = node.min,
|
||
|
max = node.max;
|
||
|
|
||
|
switch (max) {
|
||
|
case undefined:
|
||
|
case null:
|
||
|
switch (min) {
|
||
|
case 0:
|
||
|
quantifier = '*'
|
||
|
break;
|
||
|
case 1:
|
||
|
quantifier = '+';
|
||
|
break;
|
||
|
default:
|
||
|
quantifier = '{' + min + ',}';
|
||
|
break;
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
if (min == max) {
|
||
|
quantifier = '{' + min + '}';
|
||
|
}
|
||
|
else if (min == 0 && max == 1) {
|
||
|
quantifier = '?';
|
||
|
} else {
|
||
|
quantifier = '{' + min + ',' + max + '}';
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (!node.greedy) {
|
||
|
quantifier += '?';
|
||
|
}
|
||
|
|
||
|
return generateAtom(node.body[0]) + quantifier;
|
||
|
}
|
||
|
|
||
|
function generateReference(node) {
|
||
|
assertType(node.type, 'reference');
|
||
|
|
||
|
return '\\' + node.matchIndex;
|
||
|
}
|
||
|
|
||
|
function generateTerm(node) {
|
||
|
assertType(node.type, 'anchor|characterClass|characterClassEscape|empty|group|quantifier|reference|value');
|
||
|
|
||
|
return generate(node);
|
||
|
}
|
||
|
|
||
|
function generateValue(node) {
|
||
|
assertType(node.type, 'value');
|
||
|
|
||
|
var kind = node.kind,
|
||
|
codePoint = node.codePoint;
|
||
|
|
||
|
switch (kind) {
|
||
|
case 'controlLetter':
|
||
|
return '\\c' + fromCodePoint(codePoint + 64);
|
||
|
case 'hexadecimalEscape':
|
||
|
return '\\x' + ('00' + codePoint.toString(16).toUpperCase()).slice(-2);
|
||
|
case 'identifier':
|
||
|
return '\\' + fromCodePoint(codePoint);
|
||
|
case 'null':
|
||
|
return '\\' + codePoint;
|
||
|
case 'octal':
|
||
|
return '\\' + codePoint.toString(8);
|
||
|
case 'singleEscape':
|
||
|
switch (codePoint) {
|
||
|
case 0x0008:
|
||
|
return '\\b';
|
||
|
case 0x009:
|
||
|
return '\\t';
|
||
|
case 0x00A:
|
||
|
return '\\n';
|
||
|
case 0x00B:
|
||
|
return '\\v';
|
||
|
case 0x00C:
|
||
|
return '\\f';
|
||
|
case 0x00D:
|
||
|
return '\\r';
|
||
|
default:
|
||
|
throw Error('Invalid codepoint: ' + codePoint);
|
||
|
}
|
||
|
case 'symbol':
|
||
|
return fromCodePoint(codePoint);
|
||
|
case 'unicodeEscape':
|
||
|
return '\\u' + ('0000' + codePoint.toString(16).toUpperCase()).slice(-4);
|
||
|
case 'unicodeCodePointEscape':
|
||
|
return '\\u{' + codePoint.toString(16).toUpperCase() + '}';
|
||
|
default:
|
||
|
throw Error('Unsupported node kind: ' + kind);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*--------------------------------------------------------------------------*/
|
||
|
|
||
|
generate.alternative = generateAlternative;
|
||
|
generate.anchor = generateAnchor;
|
||
|
generate.characterClass = generateCharacterClass;
|
||
|
generate.characterClassEscape = generateCharacterClassEscape;
|
||
|
generate.characterClassRange = generateCharacterClassRange;
|
||
|
generate.disjunction = generateDisjunction;
|
||
|
generate.dot = generateDot;
|
||
|
generate.group = generateGroup;
|
||
|
generate.quantifier = generateQuantifier;
|
||
|
generate.reference = generateReference;
|
||
|
generate.value = generateValue;
|
||
|
|
||
|
/*--------------------------------------------------------------------------*/
|
||
|
|
||
|
// export regjsgen
|
||
|
// some AMD build optimizers, like r.js, check for condition patterns like the following:
|
||
|
if (typeof define == 'function' && typeof define.amd == 'object' && define.amd) {
|
||
|
// define as an anonymous module so, through path mapping, it can be aliased
|
||
|
define(function() {
|
||
|
return {
|
||
|
'generate': generate
|
||
|
};
|
||
|
});
|
||
|
}
|
||
|
// check for `exports` after `define` in case a build optimizer adds an `exports` object
|
||
|
else if (freeExports && freeModule) {
|
||
|
// in Narwhal, Node.js, Rhino -require, or RingoJS
|
||
|
freeExports.generate = generate;
|
||
|
}
|
||
|
// in a browser or Rhino
|
||
|
else {
|
||
|
root.regjsgen = {
|
||
|
'generate': generate
|
||
|
};
|
||
|
}
|
||
|
}.call(this));
|