Hi @magdaaniol
Thank you so much for the advice.
Its actually giving me the score now
meta.json generated in the model:
{
"lang":"en",
"name":"pipeline",
"version":"0.0.0",
"spacy_version":">=3.8.4,<3.9.0",
"description":"",
"author":"",
"email":"",
"url":"",
"license":"",
"spacy_git_version":"85cc763",
"vectors":{
"width":0,
"vectors":0,
"keys":0,
"name":null,
"mode":"default"
},
"labels":{
"textcat_multilabel":[
"Jet Fuel",
"LNG",
"Propane",
"Marine fuel 0.5%",
"Natural gas",
"Bunker fuel",
"Gasoline",
"Dubai crude",
"Jet fuel",
"Paraxylene",
"Container freight",
"Soybean oil",
"Crude palm oil",
"Rice",
"Marine gasoil",
"Gasoil",
"Naphtha",
"Wheat",
"Metallurgical coal",
"Reformulated blendstock for oxygenate blending",
"Ultra low sulfur diesel fuel",
"Refined Products",
"Corn",
"Carbon credits",
"Ethanol",
"Molybdenum oxide",
"Kerosene",
"NGL",
"Lithium hydroxide",
"Crude Oil",
"IFO 380 CST",
"RON 92",
"Raffia grade polypropylene",
"Wind energy",
"Solar energy",
"Renewable energy certificate (REC)",
"Basrah Heavy crude",
"WTI Midland crude",
"Soybeans",
"Methanol",
"Styrene monomer",
"Nitrates",
"Hot-rolled steel coil",
"IFO 180 CST",
"Crude oil",
"Raffinate-1",
"Ammonia",
"Dirty freight",
"Dried distillers grains",
"Clean freight",
"Soybean meal",
"High sulfur fuel oil",
"Biodiesel",
"FAME 0 (RED) grade",
"Benzene",
"Molybdenum",
"Ferromolybdenum",
"Rhodium",
"Cold-rolled steel coil",
"Iron ore",
"Thermal coal",
"Intermediates",
"2-Ethylhexyl acrylate",
"Bottle grade PET",
"Urea",
"Block copolymer polypropylene",
"Mixed xylene",
"Brent crude",
"Fuel oil",
"MTBE",
"Ethylene",
"Ferrous scrap",
"Lithium carbonate",
"Coal",
"LPG",
"Acetic acid",
"Vinyl acetate monomer",
"ETBE",
"WTI Cushing crude",
"Toluene",
"Refined products",
"Electricity",
"Low sulfur fuel oil",
"Slurry oil",
"Sulfur",
"Pulverized coal injection (PCI) coal",
"Low density polyethylene",
"Shipping",
"Cobalt",
"Dry bulk freight",
"Linear low density polyethylene",
"Alumina",
"Heavy naphtha",
"EU allowances (EUA)",
"Biaxially oriented polypropylene",
"Forties crude",
"Used cooking oil methyl ester",
"Uranium",
"Butadiene",
"Styrene butadiene rubber",
"Asphalt",
"Rebar",
"Recycle grade PET",
"Steel plate",
"C199\nCrude palm oil",
"RBDP stearin",
"Rapeseed oil",
"Polypropylene",
"Aluminum",
"Oseberg crude",
"Methyl methacrylate",
"Purified terephthalic acid",
"Palm oil mill effluent (POME) oil",
"12.5% protein wheat",
"White spirit",
"Basrah Medium crude",
"Lithium spodumene",
"Beef",
"Wet freight"
]
},
"pipeline":[
"textcat_multilabel"
],
"components":[
"textcat_multilabel"
],
"disabled":[
],
"performance":{
"cats_score":0.4725205941,
"cats_score_desc":"macro AUC",
"cats_micro_p":0.9512195122,
"cats_micro_r":0.7005988024,
"cats_micro_f":0.8068965517,
"cats_macro_p":0.3143621085,
"cats_macro_r":0.2921695951,
"cats_macro_f":0.2993998017,
"cats_macro_auc":0.4725205941,
"cats_f_per_type":{
"Jet Fuel":{
"p":1.0,
"r":1.0,
"f":1.0
},
"LNG":{
"p":0.9090909091,
"r":1.0,
"f":0.9523809524
},
"Propane":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Marine fuel 0.5%":{
"p":1.0,
"r":0.8181818182,
"f":0.9
},
"Natural gas":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Bunker fuel":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Gasoline":{
"p":1.0,
"r":0.9333333333,
"f":0.9655172414
},
"Dubai crude":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Jet fuel":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Paraxylene":{
"p":1.0,
"r":0.75,
"f":0.8571428571
},
"Container freight":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Soybean oil":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Crude palm oil":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Rice":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Marine gasoil":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Gasoil":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Naphtha":{
"p":1.0,
"r":0.5,
"f":0.6666666667
},
"Wheat":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Metallurgical coal":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Reformulated blendstock for oxygenate blending":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Ultra low sulfur diesel fuel":{
"p":1.0,
"r":0.6666666667,
"f":0.8
},
"Refined Products":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Corn":{
"p":1.0,
"r":0.6666666667,
"f":0.8
},
"Carbon credits":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Ethanol":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Molybdenum oxide":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Kerosene":{
"p":1.0,
"r":1.0,
"f":1.0
},
"NGL":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Lithium hydroxide":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Crude Oil":{
"p":1.0,
"r":0.5,
"f":0.6666666667
},
"IFO 380 CST":{
"p":1.0,
"r":0.5,
"f":0.6666666667
},
"RON 92":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Raffia grade polypropylene":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Wind energy":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Solar energy":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Renewable energy certificate (REC)":{
"p":1.0,
"r":0.8,
"f":0.8888888889
},
"Basrah Heavy crude":{
"p":1.0,
"r":1.0,
"f":1.0
},
"WTI Midland crude":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Soybeans":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Methanol":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Styrene monomer":{
"p":0.5,
"r":1.0,
"f":0.6666666667
},
"Nitrates":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Hot-rolled steel coil":{
"p":0.0,
"r":0.0,
"f":0.0
},
"IFO 180 CST":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Crude oil":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Raffinate-1":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Ammonia":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Dirty freight":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Dried distillers grains":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Clean freight":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Soybean meal":{
"p":1.0,
"r":1.0,
"f":1.0
},
"High sulfur fuel oil":{
"p":1.0,
"r":0.8333333333,
"f":0.9090909091
},
"Biodiesel":{
"p":0.0,
"r":0.0,
"f":0.0
},
"FAME 0 (RED) grade":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Benzene":{
"p":1.0,
"r":0.8,
"f":0.8888888889
},
"Molybdenum":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Ferromolybdenum":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Rhodium":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Cold-rolled steel coil":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Iron ore":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Thermal coal":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Intermediates":{
"p":0.0,
"r":0.0,
"f":0.0
},
"2-Ethylhexyl acrylate":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Bottle grade PET":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Urea":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Block copolymer polypropylene":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Mixed xylene":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Brent crude":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Fuel oil":{
"p":0.0,
"r":0.0,
"f":0.0
},
"MTBE":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Ethylene":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Ferrous scrap":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Lithium carbonate":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Coal":{
"p":0.0,
"r":0.0,
"f":0.0
},
"LPG":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Acetic acid":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Vinyl acetate monomer":{
"p":0.0,
"r":0.0,
"f":0.0
},
"ETBE":{
"p":0.0,
"r":0.0,
"f":0.0
},
"WTI Cushing crude":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Toluene":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Refined products":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Electricity":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Low sulfur fuel oil":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Slurry oil":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Sulfur":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Pulverized coal injection (PCI) coal":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Low density polyethylene":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Shipping":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Cobalt":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Dry bulk freight":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Linear low density polyethylene":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Alumina":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Heavy naphtha":{
"p":0.0,
"r":0.0,
"f":0.0
},
"EU allowances (EUA)":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Biaxially oriented polypropylene":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Forties crude":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Used cooking oil methyl ester":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Uranium":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Butadiene":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Styrene butadiene rubber":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Asphalt":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Rebar":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Recycle grade PET":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Steel plate":{
"p":0.0,
"r":0.0,
"f":0.0
},
"C199\nCrude palm oil":{
"p":0.0,
"r":0.0,
"f":0.0
},
"RBDP stearin":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Rapeseed oil":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Polypropylene":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Aluminum":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Oseberg crude":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Methyl methacrylate":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Purified terephthalic acid":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Palm oil mill effluent (POME) oil":{
"p":0.0,
"r":0.0,
"f":0.0
},
"12.5% protein wheat":{
"p":0.0,
"r":0.0,
"f":0.0
},
"White spirit":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Basrah Medium crude":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Lithium spodumene":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Beef":{
"p":0.0,
"r":0.0,
"f":0.0
},
"Wet freight":{
"p":0.0,
"r":0.0,
"f":0.0
}
},
"textcat_multilabel_loss":0.0206836168
}
}
- How to improve the model accuracy here? - By providing the more training data or any other solution here?
- How to identify which category is giving the low accuracy? - Is it my checking the performance field in mets.json?
Can you check the config.cfg file below and suggest me if anything I need to change to improve the accuracy?
config.cfg:
[paths]
train = null
dev = null
vectors = null
init_tok2vec = null
[system]
gpu_allocator = null
seed = 0
[nlp]
lang = "en"
pipeline = ["textcat_multilabel"]
batch_size = 1000
disabled =
before_creation = null
after_creation = null
after_pipeline_creation = null
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
vectors = {"@vectors":"spacy.Vectors.v1"}
[components]
[components.textcat_multilabel]
factory = "textcat_multilabel"
scorer = {"@scorers":"spacy.textcat_multilabel_scorer.v2"}
threshold = 0.5
[components.textcat_multilabel.model]
@architectures = "spacy.TextCatBOW.v3"
exclusive_classes = false
length = 262144
ngram_size = 3
no_output_layer = false
nO = null
[corpora]
[corpora.dev]
@readers = "spacy.Corpus.v1"
path = ${paths.dev}
max_length = 0
gold_preproc = false
limit = 0
augmenter = null
[corpora.train]
@readers = "spacy.Corpus.v1"
path = ${paths.train}
max_length = 0
gold_preproc = false
limit = 0
augmenter = null
[training]
dev_corpus = "corpora.dev"
train_corpus = "corpora.train"
seed = ${system.seed}
gpu_allocator = ${system.gpu_allocator}
dropout = 0.1
accumulate_gradient = 1
patience = 1600
max_epochs = 0
max_steps = 20000
eval_frequency = 200
frozen_components =
annotating_components =
before_to_disk = null
before_update = null
[training.batcher]
@batchers = "spacy.batch_by_words.v1"
discard_oversize = false
tolerance = 0.2
get_length = null
[training.batcher.size]
@schedules = "compounding.v1"
start = 100
stop = 1000
compound = 1.001
t = 0.0
[training.logger]
@loggers = "spacy.ConsoleLogger.v1"
progress_bar = true
[training.optimizer]
@optimizers = "Adam.v1"
beta1 = 0.9
beta2 = 0.999
L2_is_weight_decay = true
L2 = 0.01
grad_clip = 1.0
use_averages = false
eps = 0.00000001
learn_rate = 0.001
[training.score_weights]
cats_score = 1.0
cats_score_desc = null
cats_micro_p = null
cats_micro_r = null
cats_micro_f = null
cats_macro_p = null
cats_macro_r = null
cats_macro_f = null
cats_macro_auc = null
cats_f_per_type = null
[pretraining]
[initialize]
vectors = ${paths.vectors}
init_tok2vec = ${paths.init_tok2vec}
vocab_data = null
lookups = null
before_init = null
after_init = null
[initialize.components]
[initialize.tokenizer]