The first regex I came up with is the following:
/([^-]+)(-\w*)/g
The first group ([^-]+)
grabs everything that is not a dash. We then follow that up the actual capture group we want (-\w+)
. The we add the flag g
to make the regular expression object keep track of the last place it looked. This means, each time we run regex.exec(search)
we get the next match what you see in regex101.
Note: The \w
for JavaScript is equivalent to [a-zA-Z0-9_]
. So, if you just want letters use this instead of \w
: [a-zA-Z]
Here is the code that implements this regular expression.
<p id = "input">
Sentence would go here
-foo
-bar
Another sentence would go here
-baz
-bat
</p>
<p id = "output">
</p>
<script>
function check_for_word(search) {return search.split(/\w/).length > 1}
function capture(regex, search) {
var
match = regex.exec(search),
result = [],
gather;
while(match) {
gather = [];
gather.push(match[2]);
match = regex.exec(search);
while(match && !check_for_word(match[1])) {
gather.push(match[2]);
match = regex.exec(search);
};
result.push(gather);
}
return result;
};
var output = capture(/([^-]+)(-\w+)/g, document.getElementById("input").innerHTML);
document.getElementById("output").innerHTML = JSON.stringify(output);
</script>
Using a slightly modified regular expression, you might get more of what you are looking for.
/[^-]+((?:-\w+[^-\w]*)+)/g
The extra bit of [^-\w]*
allows for there to be some sort of separation between each dash word. Then the non-capturing group (?:)
was added to allow the +
one or more of the dashes. We also do not need the ()
around [^-]+
, because the data is no longer needed as you will see below. The first is more flexible as to what can break between dash words, but I find this one a lot cleaner.
function capture(regex, search) {
var
match = regex.exec(search),
result = [],
gather;
while(match) {
gather = [];
var temp = match[1].split('-');
for(var i in temp)
{
temp[i] = temp[i].split(/\W*/).join("");
if(temp[i].length > 0)
gather.push("-" + temp[i]);
}
result.push(gather);
match = regex.exec(search);
};
return result;
};
var output = capture(/[^-]+((?:-\w+[^-\w]*)+)/g, document.getElementById("input").innerHTML);
document.getElementById("output").innerHTML = JSON.stringify(output);
<p id = "input">
Sentence would go here
-foo
-bar
Another sentence would go here
-baz
-bat
My very own sentence!
-get
-all
-of
-these!
</p>
<p id = "output">
</p>