Spaces:
Runtime error
Runtime error
| # Those are manual mapping that are not caught by our stemming rules or would | |
| # would be done incorrectly by our automatic stemming rule. In details, | |
| # the keys of the _MANUAL_MATCHES dict contains the original word and the value | |
| # contains the transformation of the word expected by the OKVQA stemming rule. | |
| # These manual rules were found by checking the `raw_answers` and the `answers` | |
| # fields of the released OKVQA dataset and checking all things that were not | |
| # properly mapped by our automatic rules. In particular some of the mapping | |
| # are sometimes constant, e.g. christmas -> christmas which was incorrectly | |
| # singularized by our inflection.singularize. | |
| import re | |
| import nltk | |
| from nltk.corpus.reader import VERB | |
| import inflection | |
| _MANUAL_MATCHES = { | |
| "police": "police", | |
| "las": "las", | |
| "vegas": "vegas", | |
| "yes": "yes", | |
| "jeans": "jean", | |
| "hell's": "hell", | |
| "domino's": "domino", | |
| "morning": "morn", | |
| "clothes": "cloth", | |
| "are": "are", | |
| "riding": "ride", | |
| "leaves": "leaf", | |
| "dangerous": "danger", | |
| "clothing": "cloth", | |
| "texting": "text", | |
| "kiting": "kite", | |
| "firefighters": "firefight", | |
| "ties": "tie", | |
| "married": "married", | |
| "teething": "teeth", | |
| "gloves": "glove", | |
| "tennis": "tennis", | |
| "dining": "dine", | |
| "directions": "direct", | |
| "waves": "wave", | |
| "christmas": "christmas", | |
| "drives": "drive", | |
| "pudding": "pud", | |
| "coding": "code", | |
| "plating": "plate", | |
| "quantas": "quanta", | |
| "hornes": "horn", | |
| "graves": "grave", | |
| "mating": "mate", | |
| "paned": "pane", | |
| "alertness": "alert", | |
| "sunbathing": "sunbath", | |
| "tenning": "ten", | |
| "wetness": "wet", | |
| "urinating": "urine", | |
| "sickness": "sick", | |
| "braves": "brave", | |
| "firefighting": "firefight", | |
| "lenses": "lens", | |
| "reflections": "reflect", | |
| "backpackers": "backpack", | |
| "eatting": "eat", | |
| "designers": "design", | |
| "curiousity": "curious", | |
| "playfulness": "play", | |
| "blindness": "blind", | |
| "hawke": "hawk", | |
| "tomatoe": "tomato", | |
| "rodeoing": "rodeo", | |
| "brightness": "bright", | |
| "circuses": "circus", | |
| "skateboarders": "skateboard", | |
| "staring": "stare", | |
| "electronics": "electron", | |
| "electicity": "elect", | |
| "mountainous": "mountain", | |
| "socializing": "social", | |
| "hamburgers": "hamburg", | |
| "caves": "cave", | |
| "transitions": "transit", | |
| "wading": "wade", | |
| "creame": "cream", | |
| "toileting": "toilet", | |
| "sautee": "saute", | |
| "buildings": "build", | |
| "belongings": "belong", | |
| "stockings": "stock", | |
| "walle": "wall", | |
| "cumulis": "cumuli", | |
| "travelers": "travel", | |
| "conducter": "conduct", | |
| "browsing": "brows", | |
| "pooping": "poop", | |
| "haircutting": "haircut", | |
| "toppings": "top", | |
| "hearding": "heard", | |
| "sunblocker": "sunblock", | |
| "bases": "base", | |
| "markings": "mark", | |
| "mopeds": "mope", | |
| "kindergartener": "kindergarten", | |
| "pies": "pie", | |
| "scrapbooking": "scrapbook", | |
| "couponing": "coupon", | |
| "meetings": "meet", | |
| "elevators": "elev", | |
| "lowes": "low", | |
| "men's": "men", | |
| "childrens": "children", | |
| "shelves": "shelve", | |
| "paintings": "paint", | |
| "raines": "rain", | |
| "paring": "pare", | |
| "expressions": "express", | |
| "routes": "rout", | |
| "pease": "peas", | |
| "vastness": "vast", | |
| "awning": "awn", | |
| "boy's": "boy", | |
| "drunkenness": "drunken", | |
| "teasing": "teas", | |
| "conferences": "confer", | |
| "ripeness": "ripe", | |
| "suspenders": "suspend", | |
| "earnings": "earn", | |
| "reporters": "report", | |
| "kid's": "kid", | |
| "containers": "contain", | |
| "corgie": "corgi", | |
| "porche": "porch", | |
| "microwaves": "microwave", | |
| "batter's": "batter", | |
| "sadness": "sad", | |
| "apartments": "apart", | |
| "oxygenize": "oxygen", | |
| "striping": "stripe", | |
| "purring": "pure", | |
| "professionals": "profession", | |
| "piping": "pipe", | |
| "farmer's": "farmer", | |
| "potatoe": "potato", | |
| "emirates": "emir", | |
| "womens": "women", | |
| "veteran's": "veteran", | |
| "wilderness": "wilder", | |
| "propellers": "propel", | |
| "alpes": "alp", | |
| "charioteering": "chariot", | |
| "swining": "swine", | |
| "illness": "ill", | |
| "crepte": "crept", | |
| "adhesives": "adhesive", | |
| "regent's": "regent", | |
| "decorations": "decor", | |
| "rabbies": "rabbi", | |
| "overseas": "oversea", | |
| "travellers": "travel", | |
| "casings": "case", | |
| "smugness": "smug", | |
| "doves": "dove", | |
| "nationals": "nation", | |
| "mustange": "mustang", | |
| "ringe": "ring", | |
| "gondoliere": "gondolier", | |
| "vacationing": "vacate", | |
| "reminders": "remind", | |
| "baldness": "bald", | |
| "settings": "set", | |
| "glaced": "glace", | |
| "coniferous": "conifer", | |
| "revelations": "revel", | |
| "personals": "person", | |
| "daughter's": "daughter", | |
| "badness": "bad", | |
| "projections": "project", | |
| "polarizing": "polar", | |
| "vandalizers": "vandal", | |
| "minerals": "miner", | |
| "protesters": "protest", | |
| "controllers": "control", | |
| "weddings": "wed", | |
| "sometimes": "sometime", | |
| "earing": "ear", | |
| } | |
| class OKVQAStemmer: | |
| """Stemmer to match OKVQA v1.1 procedure.""" | |
| def __init__(self): | |
| self._wordnet_lemmatizer = nltk.stem.WordNetLemmatizer() | |
| def stem(self, input_string): | |
| """Apply stemming.""" | |
| word_and_pos = nltk.pos_tag(nltk.tokenize.word_tokenize(input_string)) | |
| stemmed_words = [] | |
| for w, p in word_and_pos: | |
| if w in _MANUAL_MATCHES: | |
| w = _MANUAL_MATCHES[w] | |
| elif w.endswith("ing"): | |
| w = self._wordnet_lemmatizer.lemmatize(w, VERB) | |
| elif p.startswith("NNS") or p.startswith("NNPS"): | |
| w = inflection.singularize(w) | |
| stemmed_words.append(w) | |
| return " ".join(stemmed_words) | |
| stemmer = OKVQAStemmer() | |
| def postprocess_ok_vqa_generation(prediction) -> str: | |
| prediction_stem = stemmer.stem(prediction) | |
| return prediction_stem | |