You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

409 lines
10 KiB
JavaScript

5 years ago
/*!
* RegJSGen
* Copyright 2014 Benjamin Tan <https://d10.github.io/>
* Available under MIT license <http://d10.mit-license.org/>
*/
;(function() {
'use strict';
/** Used to determine if values are of the language type `Object` */
var objectTypes = {
'function': true,
'object': true
};
/** Used as a reference to the global object */
var root = (objectTypes[typeof window] && window) || this;
/** Backup possible global object */
var oldRoot = root;
/** Detect free variable `exports` */
var freeExports = objectTypes[typeof exports] && exports;
/** Detect free variable `module` */
var freeModule = objectTypes[typeof module] && module && !module.nodeType && module;
/** Detect free variable `global` from Node.js or Browserified code and use it as `root` */
var freeGlobal = freeExports && freeModule && typeof global == 'object' && global;
if (freeGlobal && (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal || freeGlobal.self === freeGlobal)) {
root = freeGlobal;
}
/*--------------------------------------------------------------------------*/
/*! Based on https://mths.be/fromcodepoint v0.2.0 by @mathias */
var stringFromCharCode = String.fromCharCode;
var floor = Math.floor;
function fromCodePoint() {
var MAX_SIZE = 0x4000;
var codeUnits = [];
var highSurrogate;
var lowSurrogate;
var index = -1;
var length = arguments.length;
if (!length) {
return '';
}
var result = '';
while (++index < length) {
var codePoint = Number(arguments[index]);
if (
!isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
codePoint < 0 || // not a valid Unicode code point
codePoint > 0x10FFFF || // not a valid Unicode code point
floor(codePoint) != codePoint // not an integer
) {
throw RangeError('Invalid code point: ' + codePoint);
}
if (codePoint <= 0xFFFF) {
// BMP code point
codeUnits.push(codePoint);
} else {
// Astral code point; split in surrogate halves
// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
codePoint -= 0x10000;
highSurrogate = (codePoint >> 10) + 0xD800;
lowSurrogate = (codePoint % 0x400) + 0xDC00;
codeUnits.push(highSurrogate, lowSurrogate);
}
if (index + 1 == length || codeUnits.length > MAX_SIZE) {
result += stringFromCharCode.apply(null, codeUnits);
codeUnits.length = 0;
}
}
return result;
}
function assertType(type, expected) {
if (expected.indexOf('|') == -1) {
if (type == expected) {
return;
}
throw Error('Invalid node type: ' + type);
}
expected = assertType.hasOwnProperty(expected)
? assertType[expected]
: (assertType[expected] = RegExp('^(?:' + expected + ')$'));
if (expected.test(type)) {
return;
}
throw Error('Invalid node type: ' + type);
}
/*--------------------------------------------------------------------------*/
function generate(node) {
var type = node.type;
if (generate.hasOwnProperty(type) && typeof generate[type] == 'function') {
return generate[type](node);
}
throw Error('Invalid node type: ' + type);
}
/*--------------------------------------------------------------------------*/
function generateAlternative(node) {
assertType(node.type, 'alternative');
var terms = node.body,
length = terms ? terms.length : 0;
if (length == 1) {
return generateTerm(terms[0]);
} else {
var i = -1,
result = '';
while (++i < length) {
result += generateTerm(terms[i]);
}
return result;
}
}
function generateAnchor(node) {
assertType(node.type, 'anchor');
switch (node.kind) {
case 'start':
return '^';
case 'end':
return '$';
case 'boundary':
return '\\b';
case 'not-boundary':
return '\\B';
default:
throw Error('Invalid assertion');
}
}
function generateAtom(node) {
assertType(node.type, 'anchor|characterClass|characterClassEscape|dot|group|reference|value');
return generate(node);
}
function generateCharacterClass(node) {
assertType(node.type, 'characterClass');
var classRanges = node.body,
length = classRanges ? classRanges.length : 0;
var i = -1,
result = '[';
if (node.negative) {
result += '^';
}
while (++i < length) {
result += generateClassAtom(classRanges[i]);
}
result += ']';
return result;
}
function generateCharacterClassEscape(node) {
assertType(node.type, 'characterClassEscape');
return '\\' + node.value;
}
function generateCharacterClassRange(node) {
assertType(node.type, 'characterClassRange');
var min = node.min,
max = node.max;
if (min.type == 'characterClassRange' || max.type == 'characterClassRange') {
throw Error('Invalid character class range');
}
return generateClassAtom(min) + '-' + generateClassAtom(max);
}
function generateClassAtom(node) {
assertType(node.type, 'anchor|characterClassEscape|characterClassRange|dot|value');
return generate(node);
}
function generateDisjunction(node) {
assertType(node.type, 'disjunction');
var body = node.body,
length = body ? body.length : 0;
if (length == 0) {
throw Error('No body');
} else if (length == 1) {
return generate(body[0]);
} else {
var i = -1,
result = '';
while (++i < length) {
if (i != 0) {
result += '|';
}
result += generate(body[i]);
}
return result;
}
}
function generateDot(node) {
assertType(node.type, 'dot');
return '.';
}
function generateGroup(node) {
assertType(node.type, 'group');
var result = '(';
switch (node.behavior) {
case 'normal':
break;
case 'ignore':
result += '?:';
break;
case 'lookahead':
result += '?=';
break;
case 'negativeLookahead':
result += '?!';
break;
default:
throw Error('Invalid behaviour: ' + node.behaviour);
}
var body = node.body,
length = body ? body.length : 0;
if (length == 1) {
result += generate(body[0]);
} else {
var i = -1;
while (++i < length) {
result += generate(body[i]);
}
}
result += ')';
return result;
}
function generateQuantifier(node) {
assertType(node.type, 'quantifier');
var quantifier = '',
min = node.min,
max = node.max;
switch (max) {
case undefined:
case null:
switch (min) {
case 0:
quantifier = '*'
break;
case 1:
quantifier = '+';
break;
default:
quantifier = '{' + min + ',}';
break;
}
break;
default:
if (min == max) {
quantifier = '{' + min + '}';
}
else if (min == 0 && max == 1) {
quantifier = '?';
} else {
quantifier = '{' + min + ',' + max + '}';
}
break;
}
if (!node.greedy) {
quantifier += '?';
}
return generateAtom(node.body[0]) + quantifier;
}
function generateReference(node) {
assertType(node.type, 'reference');
return '\\' + node.matchIndex;
}
function generateTerm(node) {
assertType(node.type, 'anchor|characterClass|characterClassEscape|empty|group|quantifier|reference|value');
return generate(node);
}
function generateValue(node) {
assertType(node.type, 'value');
var kind = node.kind,
codePoint = node.codePoint;
switch (kind) {
case 'controlLetter':
return '\\c' + fromCodePoint(codePoint + 64);
case 'hexadecimalEscape':
return '\\x' + ('00' + codePoint.toString(16).toUpperCase()).slice(-2);
case 'identifier':
return '\\' + fromCodePoint(codePoint);
case 'null':
return '\\' + codePoint;
case 'octal':
return '\\' + codePoint.toString(8);
case 'singleEscape':
switch (codePoint) {
case 0x0008:
return '\\b';
case 0x009:
return '\\t';
case 0x00A:
return '\\n';
case 0x00B:
return '\\v';
case 0x00C:
return '\\f';
case 0x00D:
return '\\r';
default:
throw Error('Invalid codepoint: ' + codePoint);
}
case 'symbol':
return fromCodePoint(codePoint);
case 'unicodeEscape':
return '\\u' + ('0000' + codePoint.toString(16).toUpperCase()).slice(-4);
case 'unicodeCodePointEscape':
return '\\u{' + codePoint.toString(16).toUpperCase() + '}';
default:
throw Error('Unsupported node kind: ' + kind);
}
}
/*--------------------------------------------------------------------------*/
generate.alternative = generateAlternative;
generate.anchor = generateAnchor;
generate.characterClass = generateCharacterClass;
generate.characterClassEscape = generateCharacterClassEscape;
generate.characterClassRange = generateCharacterClassRange;
generate.disjunction = generateDisjunction;
generate.dot = generateDot;
generate.group = generateGroup;
generate.quantifier = generateQuantifier;
generate.reference = generateReference;
generate.value = generateValue;
/*--------------------------------------------------------------------------*/
// export regjsgen
// some AMD build optimizers, like r.js, check for condition patterns like the following:
if (typeof define == 'function' && typeof define.amd == 'object' && define.amd) {
// define as an anonymous module so, through path mapping, it can be aliased
define(function() {
return {
'generate': generate
};
});
}
// check for `exports` after `define` in case a build optimizer adds an `exports` object
else if (freeExports && freeModule) {
// in Narwhal, Node.js, Rhino -require, or RingoJS
freeExports.generate = generate;
}
// in a browser or Rhino
else {
root.regjsgen = {
'generate': generate
};
}
}.call(this));