I am training a NER model using Prodigy and have run into an situation where one of the patterns I built and tested using spacy's matcher class doesn't work as an input in a patterns file. Is this a bug?
Pattern.jsonl:
prog_patterns.jsonl (618 Bytes)
spacy:
import spacy
from spacy.matcher import Matcher
nlp = spacy.load("en_core_web_sm")
matcher = Matcher(nlp.vocab, validate=True)
pattern = [{"ORTH": {"IN": ['senior', 'Senior', 'SENIOR', 'sr', 'Sr', 'SR', 'sr.', 'Sr.', 'SR.', 'junior', 'Junior', 'JUNIOR',
'jr', 'Jr', 'JR', 'jr.', 'Jr.', 'JR.', 'c', 'C', 'C', 'c++', 'C++', 'C++', 'c#', 'C#', 'C#', 'csharp',
'Csharp', 'CSHARP', 'java', 'Java', 'JAVA', 'javascript', 'Javascript', 'JAVASCRIPT', 'julia', 'Julia',
'JULIA', 'r', 'R', 'python', 'Python', 'PYTHON', 'php', 'Php', 'PHP', 'ruby', 'Ruby', 'RUBY', 'sql',
'Sql', 'SQL', 'nosql', 'Nosql', 'NOSQL', 'hql', 'Hql', 'HQL', 'fortran', 'Fortran', 'FORTRAN', 'cobalt', 'Cobalt', 'COBALT']}, "OP": "+"}, {"LOWER": "programmer"}]
matcher.add("programming", None, pattern)
matcher(nlp("Senior Java Programmer"))`
Prodigy:
python -m prodigy ner.teach example_db en_core_web_sm job_titles_005.txt --loader txt --label example --patterns prog_patterns.jsonl
Using 1 labels: example
Traceback (most recent call last):
File "C:\Anaconda\envs\ex\lib\site-packages\srsly\_json_api.py", line 131, in _yield_json_lines
yield ujson.loads(line)
ValueError: Expected object or value
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Anaconda\envs\ex\lib\runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "C:\Anaconda\envs\ex\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Anaconda\envs\ex\lib\site-packages\prodigy\__main__.py", line 380, in <module>
controller = recipe(*args, use_plac=True)
File "cython_src\prodigy\core.pyx", line 212, in prodigy.core.recipe.recipe_decorator.recipe_proxy
File "C:\Anaconda\envs\ex\lib\site-packages\plac_core.py", line 328, in call
cmd, result = parser.consume(arglist)
File "C:\Anaconda\envs\ex\lib\site-packages\plac_core.py", line 207, in consume
return cmd, self.func(*(args + varargs + extraopts), **kwargs)
File "C:\Anaconda\envs\ex\lib\site-packages\prodigy\recipes\ner.py", line 143, in teach
matcher = PatternMatcher(model.nlp).from_disk(patterns)
File "cython_src\prodigy\models\matcher.pyx", line 208, in prodigy.models.matcher.PatternMatcher.from_disk
File "C:\Anaconda\envs\ex\lib\site-packages\srsly\_json_api.py", line 85, in read_jsonl
for line in _yield_json_lines(f, skip=skip):
File "C:\Anaconda\envs\ex\lib\site-packages\srsly\_json_api.py", line 135, in _yield_json_lines
raise ValueError("Invalid JSON on line {}: {}".format(line_no, line))
ValueError: Invalid JSON on line 1: {"label":"example","pattern":[{"TEXT": {"IN": ['senior', 'Senior', 'SENIOR', 'sr', 'Sr', 'SR', 'sr.', 'Sr.', 'SR.', 'junior', 'Junior', 'JUNIOR', 'jr', 'Jr', 'JR', 'jr.', 'Jr.', 'JR.', 'c', 'C', 'C', 'c++', 'C++', 'C++', 'c#', 'C#', 'C#', 'csharp', 'Csharp', 'CSHARP', 'java', 'Java', 'JAVA', 'javascript', 'Javascript', 'JAVASCRIPT', 'julia', 'Julia', 'JULIA', 'r', 'R', 'python', 'Python', 'PYTHON', 'php', 'Php', 'PHP', 'ruby', 'Ruby', 'RUBY', 'sql', 'Sql', 'SQL', 'nosql', 'Nosql', 'NOSQL', 'hql', 'Hql', 'HQL', 'fortran', 'Fortran', 'FORTRAN', 'cobalt', 'Cobalt', 'COBALT']}, "OP": "+"}, {"LOWER": "programmer"}]}