Here is a function that is in theory able to convert words between noun/verb/adjective/adverb form that I updated from here (originally written by bogs, I believe) to be compliant with nltk 3.2.5 now that synset.lemmas
and sysnset.name
are functions.
from nltk.corpus import wordnet as wn
# Just to make it a bit more readable
WN_NOUN = 'n'
WN_VERB = 'v'
WN_ADJECTIVE = 'a'
WN_ADJECTIVE_SATELLITE = 's'
WN_ADVERB = 'r'
def convert(word, from_pos, to_pos):
""" Transform words given from/to POS tags """
synsets = wn.synsets(word, pos=from_pos)
# Word not found
if not synsets:
return []
# Get all lemmas of the word (consider 'a'and 's' equivalent)
lemmas = []
for s in synsets:
for l in s.lemmas():
if s.name().split('.')[1] == from_pos or from_pos in (WN_ADJECTIVE, WN_ADJECTIVE_SATELLITE) and s.name().split('.')[1] in (WN_ADJECTIVE, WN_ADJECTIVE_SATELLITE):
lemmas += [l]
# Get related forms
derivationally_related_forms = [(l, l.derivationally_related_forms()) for l in lemmas]
# filter only the desired pos (consider 'a' and 's' equivalent)
related_noun_lemmas = []
for drf in derivationally_related_forms:
for l in drf[1]:
if l.synset().name().split('.')[1] == to_pos or to_pos in (WN_ADJECTIVE, WN_ADJECTIVE_SATELLITE) and l.synset().name().split('.')[1] in (WN_ADJECTIVE, WN_ADJECTIVE_SATELLITE):
related_noun_lemmas += [l]
# Extract the words from the lemmas
words = [l.name() for l in related_noun_lemmas]
len_words = len(words)
# Build the result in the form of a list containing tuples (word, probability)
result = [(w, float(words.count(w)) / len_words) for w in set(words)]
result.sort(key=lambda w:-w[1])
# return all the possibilities sorted by probability
return result
convert('direct', 'a', 'r')
convert('direct', 'a', 'n')
convert('quick', 'a', 'r')
convert('quickly', 'r', 'a')
convert('hunger', 'n', 'v')
convert('run', 'v', 'a')
convert('tired', 'a', 'r')
convert('tired', 'a', 'v')
convert('tired', 'a', 'n')
convert('tired', 'a', 's')
convert('wonder', 'v', 'n')
convert('wonder', 'n', 'a')
As you can see below, it doesn't work so great. It's unable to switch between adjective and adverb form (my specific goal), but it does give some interesting results in other cases.
>>> convert('direct', 'a', 'r')
[]
>>> convert('direct', 'a', 'n')
[('directness', 0.6666666666666666), ('line', 0.3333333333333333)]
>>> convert('quick', 'a', 'r')
[]
>>> convert('quickly', 'r', 'a')
[]
>>> convert('hunger', 'n', 'v')
[('hunger', 0.75), ('thirst', 0.25)]
>>> convert('run', 'v', 'a')
[('persistent', 0.16666666666666666), ('executive', 0.16666666666666666), ('operative', 0.16666666666666666), ('prevalent', 0.16666666666666666), ('meltable', 0.16666666666666666), ('operant', 0.16666666666666666)]
>>> convert('tired', 'a', 'r')
[]
>>> convert('tired', 'a', 'v')
[]
>>> convert('tired', 'a', 'n')
[('triteness', 0.25), ('banality', 0.25), ('tiredness', 0.25), ('commonplace', 0.25)]
>>> convert('tired', 'a', 's')
[]
>>> convert('wonder', 'v', 'n')
[('wonder', 0.3333333333333333), ('wonderer', 0.2222222222222222), ('marveller', 0.1111111111111111), ('marvel', 0.1111111111111111), ('wonderment', 0.1111111111111111), ('question', 0.1111111111111111)]
>>> convert('wonder', 'n', 'a')
[('curious', 0.4), ('wondrous', 0.2), ('marvelous', 0.2), ('marvellous', 0.2)]
hope this is able to save someone a little trouble
diguise
can be either a verb or a noun, depending on context. – Joel Cornettnounize('disguise') == ['disguise']
andverbify('disguise') == ['disguise']
andadjectivate('disguise') == ['disguised']
– sam boosalisnounize
as an ambiguous cast from any POS to some nouns. it doesn't know whether you want "coder" or "code", but it gives a complete superset, and later NLP can disambiguate. – sam boosalis