Coverage for src/wiktextract/topics.py: 100%

2 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-25 10:11 +0000

1# List of valid topics and canonicalization & generalization mappings 

2# for topics 

3# 

4# Copyright (c) 2020-2022 Tatu Ylonen. See file LICENSE and https://ylonen.org 

5 

6# Set of valid topic tags. (Other tags may be canonicalized to these) 

7valid_topics = set([ 

8 "Buddhism", 

9 "Catholicism", 

10 "Chinese-cuisine", 

11 "Christianity", 

12 "Internet", 

13 "Islam", 

14 "LGBT", 

15 "acrobatics", 

16 "aeronautics", 

17 "aerospace", 

18 "agriculture", 

19 "alternative-medicine", 

20 "anatomy", 

21 "anthropology", 

22 "arachnology", 

23 "archeology", 

24 "architecture", 

25 "arithmetic", 

26 "art-history", 

27 "arts", 

28 "astrology", 

29 "astronomy", 

30 "astrophysics", 

31 "automotive", 

32 "ball-games", 

33 "beverages", 

34 "biochemistry", 

35 "biology", 

36 "board-games", 

37 "botany", 

38 "broadcasting", 

39 "business", 

40 "card-games", 

41 "carpentry", 

42 "cartography", 

43 "cause", 

44 "chemistry", 

45 "cities", 

46 "color", 

47 "combinatorics", 

48 "comics", 

49 "commerce", 

50 "communications", 

51 "computing", 

52 "construction", 

53 "cooking", 

54 "copyright", 

55 "cosmology", 

56 "countries", 

57 "court", 

58 "crafts", 

59 "criminology", 

60 "cryptocurrency", 

61 "cryptography", 

62 "cuisine", 

63 "demography", 

64 "dancing", 

65 "dentistry", 

66 "diving", 

67 "dogs", 

68 "dramaturgy", 

69 "drugs", 

70 "ecology", 

71 "economics", 

72 "education", 

73 "electrical-engineering", 

74 "electricity", 

75 "electromagnetism", 

76 "energy", 

77 "engineering", 

78 "entertainment", 

79 "entomology", 

80 "epistemology", 

81 "ethnography", 

82 "falconry", 

83 "fantasy", 

84 "fashion", 

85 "film", 

86 "finance", 

87 "food", 

88 "football", 

89 "fortifications", 

90 "freemasonry", 

91 "games", 

92 "gemology", 

93 "geography", 

94 "geology", 

95 "geometry", 

96 "government", 

97 "hairdressing", 

98 "heading", 

99 "healthcare", 

100 "histology", 

101 "history", 

102 "hobbies", 

103 "horology", 

104 "horses", 

105 "horseracing", 

106 "human-sciences", 

107 "hunting", 

108 "hydrology", 

109 "ichthyology", 

110 "ideology", 

111 "information", 

112 "information-science", 

113 "intellectual-property", 

114 "jewelry", 

115 "journalism", 

116 "law", 

117 "law-enforcement", 

118 "lifestyle", 

119 "linguistics", 

120 "literature", 

121 "location", 

122 "mammals", 

123 "management", 

124 "manga", 

125 "manner", 

126 "manufacturing", 

127 "marketing", 

128 "martial-arts", 

129 "masonry", 

130 "mathematics", 

131 "meats", 

132 "mechanical-engineering", 

133 "media", 

134 "medicine", 

135 "metallurgy", 

136 "meteorology", 

137 "metrology", 

138 "microbiology", 

139 "military", 

140 "mineralogy", 

141 "mining", 

142 "monarchy", 

143 "morphology", 

144 "music", 

145 "mysticism", 

146 "mythology", 

147 "natural-sciences", 

148 "naturism", 

149 "nautical", 

150 "navy", 

151 "neurology", 

152 "neuroscience", 

153 "nobility", 

154 "number-theory", 

155 "oceanography", 

156 "oenology", 

157 "ophthalmology", 

158 "organization", 

159 "origin", 

160 "ornithology", 

161 "paleontology", 

162 "pathology", 

163 "performing-arts", 

164 "petrology", 

165 "pets", 

166 "pharmacology", 

167 "philosophy", 

168 "phonology", 

169 "photography", 

170 "physical-sciences", 

171 "physics", 

172 "physiology", 

173 "planets", 

174 "political-science", 

175 "politics", 

176 "programming", 

177 "position", 

178 "publishing", 

179 "pulmonology", 

180 "prefectures", 

181 "printing", 

182 "property", 

183 "pseudoscience", 

184 "psychiatry", 

185 "psychology", 

186 "radio", 

187 "radiology", 

188 "railways", 

189 "real-estate", 

190 "region", 

191 "religion", 

192 "scholarly", 

193 "science-fiction", 

194 "sciences", 

195 "sexuality", 

196 "skating", 

197 "skiing", 

198 "social-science", 

199 "socialism", 

200 "sociology", 

201 "software", 

202 "source", 

203 "spiritualism", 

204 "sports", 

205 "state", 

206 "states", 

207 "statistics", 

208 "taxonomy", 

209 "telecommunications", 

210 "telegraphy", 

211 "telephony", 

212 "television", 

213 "temperature", 

214 "textiles", 

215 "theater", 

216 "theology", 

217 "time", 

218 "tools", 

219 "topology", 

220 "tourism", 

221 "toxicology", 

222 "transport", 

223 "typography", 

224 "units-of-measure", 

225 "urbanism", 

226 "vehicles", 

227 "video-games", 

228 "visual-arts", 

229 "weaponry", 

230 "weather", 

231 "weekdays", 

232 "wrestling", 

233 "writing", 

234 "zoology", 

235]) 

236 

237# Translation map for topics. 

238# XXX revisit this mapping. Create more fine-tuned hierarchy 

239# XXX or should probably not try to generalize here 

240topic_generalize_map = { 

241 "(sport)": "sports", 

242 "card games": "games", 

243 "cards": "card-games", 

244 "board-games": "games", 

245 "game of Go": "board-games", 

246 "Scrabble": "board-games", 

247 "ball games": "ball-games", 

248 "ball-games": "games sports", 

249 "sports": "hobbies", 

250 "dice": "games", 

251 "rock paper scissors": "games", 

252 '"manner of action"': "manner", 

253 "manner of action": "manner", 

254 "planets of the Solar system": "planets", 

255 "planets": "astronomy", 

256 "continents": "region", 

257 "countries of Africa": "countries", 

258 "countries of Europe": "countries", 

259 "countries of Asia": "countries", 

260 "countries of South America": "countries", 

261 "countries of North America": "countries", 

262 "countries of Central America": "countries", 

263 "countries of Oceania": "countries", 

264 "countries": "geography", 

265 "country": "region", 

266 "the country": "countries", 

267 "regions of Armenia": "region", 

268 "region around the Ruppel river": "region", 

269 "geographical region": "region", 

270 "winegrowing region": "region", 

271 "the historical region": "region", 

272 "region": "location", 

273 "states of India": "states", 

274 "states of Australia": "states", 

275 "states": "region", 

276 "city": "cities", 

277 "cities": "region", 

278 "prefectures of Japan": "prefectures", 

279 "prefecture": "region", 

280 "software": "computing", 

281 "Windows": "software", 

282 "Linux": "software", 

283 "secret": "information", 

284 "mail": "information", 

285 "blackjack": "games", 

286 "backgammon": "games", 

287 "bridge": "games", 

288 "darts": "games", 

289 "scientific": "sciences", 

290 "scholarly": "sciences", 

291 "academia": "scholarly", 

292 "medicine": "sciences", 

293 "traditional medicine": "medicine", 

294 "human-sciences": "sciences", 

295 "anthropology": "human-sciences", 

296 "geography": "natural-sciences", 

297 "biology": "natural-sciences", 

298 "physical-sciences": "natural-sciences", 

299 "engineering": "physical-sciences", 

300 "drafting": "engineering", 

301 "CAD": "engineering computing", 

302 "mathematics": "sciences", 

303 "maths": "mathematics", 

304 "computing": "engineering mathematics", 

305 "anthropodology": "anthropology", 

306 "ornithology": "biology", 

307 "ornitology": "ornithology", 

308 "Ornithology": "ornithology", 

309 "birdwatching": "ornithology", 

310 "entomology": "biology", 

311 "insects": "entomology", 

312 "anatomy": "medicine", 

313 "Anatomy": "anatomy", 

314 "health": "medicine", 

315 "emergency medicine": "medicine", 

316 "bone": "anatomy", 

317 "body": "anatomy", 

318 "neuroanatomy": "anatomy neurology", 

319 "neurotoxicology": "neurology toxicology", 

320 "neurobiology": "neurology", 

321 "neurophysiology": "physiology neurology", 

322 "nephrology": "medicine", 

323 "hepatology": "medicine", 

324 "endocrinology": "medicine", 

325 "gynaecology": "medicine", 

326 "mammology": "medicine", 

327 "urology": "medicine", 

328 "neurology": "medicine neuroscience", 

329 "neuroscience": "medicine", 

330 "gerontology": "medicine", 

331 "andrology": "medicine", 

332 "phycology": "botany", 

333 "planktology": "botany", 

334 "oncology": "medicine", 

335 "hematology": "medicine", 

336 "physiology": "medicine", 

337 "gastroenterology": "medicine", 

338 "surgery": "medicine", 

339 "ophthalmology": "medicine", 

340 "pharmacology": "medicine", 

341 "pharmaceuticals": "pharmacology", 

342 "drugs": "pharmacology", 

343 "cytology": "biology medicine", 

344 "healthcare": "government", 

345 "cardiology": "medicine", 

346 "dentistry": "medicine", 

347 "odontology": "dentistry", 

348 "pathology": "medicine", 

349 "toxicology": "medicine", 

350 "dermatology": "medicine", 

351 "epidemiology": "medicine", 

352 "psychiatry": "medicine psychology", 

353 "psychoanalysis": "medicine psychology", 

354 "phrenology": "medicine psychology", 

355 "psychology": "human-sciences", 

356 "sociology": "social-science", 

357 "social science": "social-science", 

358 "social sciences": "social-science", 

359 "in transactional analysis": "social-science", 

360 "social-science": "human-sciences", 

361 "hydraulics": "engineering", 

362 "demographics": "demography", 

363 "immunology": "medicine", 

364 "immunologic sense": "immunology", 

365 "anesthesiology": "medicine", 

366 "xenobiology": "biology", 

367 "sinology": "human-sciences", 

368 "psychopathology": "psychiatry", 

369 "histopathology": "pathology histology", 

370 "histology": "biology", 

371 "virology": "microbiology", 

372 "bacteriology": "microbiology", 

373 "parapsychology": "pseudoscience", 

374 "psyschology": "psychology error-misspelling", 

375 "printing technology": "printing", 

376 "litography": "lithography", 

377 "lithography": "printing", 

378 "iconography": "art-history", 

379 "art-history": "history", 

380 "geomorphology": "geology", 

381 "phytopathology": "botany pathology", 

382 "bryology": "botany", 

383 "opthalmology": "medicine", 

384 "embryology": "medicine", 

385 "illness": "medicine", 

386 "parasitology": "medicine", 

387 "teratology": "medicine", 

388 "speech therapy": "medicine", 

389 "speech pathology": "medicine", 

390 "radiology": "medicine", 

391 "radiography": "radiology", 

392 "vaccinology": "medicine", 

393 "traumatology": "medicine", 

394 "microbiology": "biology", 

395 "pulmonology": "medicine", 

396 "obstetrics": "medicine", 

397 "pneumology": "pulmonology", 

398 "strong topology": "topology", 

399 "sociobiology": "social-science biology", 

400 "radio technology": "electrical-engineering radio", 

401 "authorship": "writing film", 

402 "volcanology": "geology", 

403 "gemmology": "gemology", 

404 "gem-cutting": "jewelry", 

405 "gemology": "geology jewelry", 

406 "jewelry": "lifestyle", 

407 "jewellery": "jewelry", 

408 "conchology": "zoology", 

409 "comics": "literature", 

410 "anime": "film", 

411 "manga": "comics", 

412 "codicology": "history", 

413 "zoology": "biology", 

414 "zootomy": "zoology", 

415 "botany": "biology", 

416 "malacology": "biology", 

417 "taxonomy": "biology", 

418 "biological category": "taxonomy", 

419 "geology": "geography", 

420 "mineralogy": "geology chemistry", 

421 "mineralology": "mineralogy", 

422 "biochemistry": "microbiology chemistry", 

423 "immunochemistry": "biochemistry immunology", 

424 "petrochemistry": "petrology chemistry", 

425 "linguistics": "human-sciences", 

426 "language": "linguistics", 

427 "grammar": "linguistics", 

428 "syntax": "linguistics", 

429 "semantics": "linguistics", 

430 "epistemology": "philosophy", 

431 "ontology": "epistemology", 

432 "etymology": "linguistics", 

433 "ethnology": "anthropology", 

434 "ethnography": "anthropology", 

435 "historical ethnography": "ethnography history", 

436 "entertainment industry": "entertainment economics", 

437 "electrochemistry": "chemistry", 

438 "classical studies": "history", 

439 "textual criticism": "linguistics", 

440 "nanotechnology": "engineering", 

441 "electromagnetism": "physics electrical-engineering", 

442 "biotechnology": "engineering medicine", 

443 "systems theory": "mathematics", 

444 "computer games": "games", 

445 "graphic design": "arts", 

446 "criminology": "law human-sciences", 

447 "penology": "criminology", 

448 "pragmatics": "linguistics", 

449 "morphology": "linguistics", 

450 "phonology": "linguistics", 

451 "phonetics": "phonology", 

452 "prosody": "phonology", 

453 "lexicography": "linguistics", 

454 "lexicology": "lexicography", 

455 "narratology": "linguistics", 

456 "linguistic": "linguistics", 

457 "translation studies": "linguistics", 

458 "semiotics": "linguistics", 

459 "dialectology": "linguistics", 

460 "ortography": "linguistics", 

461 "typography": "publishing", 

462 "letterpress typography": "typography", 

463 "psycholinguistics": "linguistics psychology", 

464 "sociolinguistics": "linguistics sociology", 

465 "beekeeping": "agriculture", 

466 "officialese": "government", 

467 "hairdressing": "crafts", 

468 "wagonmaking": "crafts", 

469 "smithwork": "crafts", 

470 "papermaking": "crafts", 

471 "hairstyle": "hairdressing", 

472 "textiles": "manufacturing", 

473 "weaving": "textiles", 

474 "quilting": "textiles", 

475 "knitting": "textiles", 

476 "sewing": "textiles", 

477 "dressmaking": "textiles", 

478 "cutting": "textiles", 

479 "furniture": "lifestyle", 

480 "freemasonry": "lifestyle", 

481 "Freemasonry": "freemasonry", 

482 "caving": "hobbies", 

483 "country dancing": "dancing", 

484 "dance": "dancing", 

485 "dancing": "sports", 

486 "hip-hop": "dancing", 

487 "cheerleading": "sports", 

488 "bowling": "sports", 

489 "athletics": "sports", 

490 "performing-arts": "arts sports", 

491 "acrobatics": "performing-arts", 

492 "castells": "acrobatics", 

493 "circus": "performing-arts", 

494 "juggling": "performing-arts", 

495 "martial arts": "martial-arts", 

496 "martial-arts": "sports military", 

497 "judo": "martial-arts", 

498 "skydiving": "sports", 

499 "meterology": "meteorology", 

500 "meteorology": "climatology", 

501 "climatology": "natural-sciences", 

502 "weather": "meteorology", 

503 "climate": "meteorology", 

504 "cryptozoology": "zoology", 

505 "lepidopterology": "zoology", 

506 "nematology": "zoology", 

507 "campanology": "history", 

508 "vexillology": "history", 

509 "phenomenology": "philosophy", 

510 "seismology": "geology", 

511 "astronomy": "natural-sciences", 

512 "cosmology": "astronomy", 

513 "astrogeology": "astronomy geology", 

514 "areology": "astronomy geology", 

515 "stratigraphy": "geology", 

516 "orography": "geology", 

517 "stenography": "writing", 

518 "graphonomics": "writing", 

519 "scriptwriting": "writing", 

520 "orthography": "writing", 

521 "palynology": "chemistry microbiology", 

522 "lichenology": "botany", 

523 "seasons": "weather", 

524 "information technology": "computing", 

525 "algebra": "mathematics", 

526 "calculus": "mathematics", 

527 "arithmetics": "mathematics", 

528 "statistics": "mathematics", 

529 "modelling": "mathematics", 

530 "geometry": "mathematics", 

531 "logic": "mathematics philosophy", 

532 "trigonometry": "mathematics", 

533 "mathematical analysis": "mathematics", 

534 "ethics": "philosophy", 

535 "existentialism": "philosophy", 

536 "religion": "lifestyle", 

537 "philosophy": "human-sciences", 

538 "shipping": "transport economics", 

539 "railways": "transport", 

540 "trains": "railways", 

541 "automotive": "vehicles", 

542 "automobile": "automotive", 

543 "vehicles": "transport", 

544 "tourism": "transport lifestyle", 

545 "travel": "tourism lifestyle", 

546 "travel industry": "tourism", 

547 "parliamentary procedure": "government", 

548 "espionage": "government military", 

549 "food": "lifestyle", 

550 "cuisine": "food", 

551 "Chinese cuisine": "Chinese-cuisine", 

552 "Indian Chinese cuisine": "Chinese-cuisine", 

553 "seafood": "cuisine", 

554 "culinary": "cuisine", 

555 "vegetable": "food", 

556 "beverages": "food", 

557 "beer": "beverages", 

558 "brewing": "beverages manufacturing", 

559 "enology": "oenology", 

560 "oenology": "beverages", 

561 "wine": "oenology", 

562 "sewage treatment": "engineering", 

563 "cooking": "food", 

564 "baking": "cooking", 

565 "Indian cookery": "cooking cuisine", 

566 "sexuality": "lifestyle", 

567 "seduction community": "sexuality", 

568 "BDSM": "sexuality", 

569 "LGBT": "sexuality", 

570 "sexual orientations": "sexuality", 

571 "romantic orientations": "sexuality", 

572 "prostitution": "sexuality", 

573 "sexology": "sexuality", 

574 "biblical": "religion", 

575 "ecclesiastical": "religion", 

576 "genetics": "biology medicine", 

577 "medical terminology": "medicine", 

578 "homeopathy": "medicine", 

579 "alternative medicine": "alternative-medicine", 

580 "alternative-medicine": "medicine", 

581 "Ayurveda": "alternative-medicine", 

582 "mycology": "biology", 

583 "paganism": "religion", 

584 "Scientology": "religion", 

585 "Islam": "religion", 

586 "Sufism": "Islam mysticism", 

587 "mechanical-engineering": "engineering", 

588 "mechanics": "mechanical-engineering", 

589 "mechanical": "mechanical-engineering", 

590 "robotics": "mechanical-engineering computing", 

591 "machining": "mechanical-engineering", 

592 "lubricants": "mechanical-engineering", 

593 "fasteners": "mechanical-engineering", 

594 "thermodynamics": "physics", 

595 "fluid dynamics": "physics", 

596 "signal processing": "computing mathematics", 

597 "topology": "mathematics", 

598 "algebraic topology": "topology", 

599 "algebraic geometry": "geometry", 

600 "norm topology": "topology", 

601 "linear algebra": "mathematics", 

602 "number-theory": "mathematics", 

603 "number theory": "number-theory", 

604 "analytic number theory": "number-theory", 

605 "insurance": "business", 

606 "taxation": "economics government", 

607 "sugar-making": "manufacturing", 

608 "glassmaking": "manufacturing", 

609 "food manufacture": "manufacturing", 

610 "manufacturing": "business", 

611 "optics": "physics engineering", 

612 "chemistry": "physical-sciences", 

613 "ceramics": "chemistry engineering", 

614 "chess": "board-games", 

615 "xiangqi": "board-games", 

616 "shogi": "board-games", 

617 "checkers": "board-games", 

618 "mahjong": "board-games", 

619 "Rubik's Cube": "games", 

620 "crystallography": "chemistry", 

621 "fluids": "chemistry physics engineering", 

622 "science": "sciences", 

623 "physics": "physical-sciences", 

624 "electrical-engineering": "engineering", 

625 "electricity": "electrical-engineering electromagnetism energy", 

626 "electronics": "electricity", 

627 "programming": "computing", 

628 "Lisp": "programming", 

629 "databases": "computing", 

630 "visual art": "visual-arts", 

631 "visual arts": "visual-arts", 

632 "visual-arts": "arts", 

633 "graffiti": "visual-arts", 

634 "crafts": "arts hobbies", 

635 "papercraft": "crafts", 

636 "bowmaking": "crafts", 

637 "lutherie": "crafts", 

638 "ironworking": "crafts", 

639 "glassblowing": "crafts", 

640 "history": "human-sciences", 

641 "Egyptology": "history", 

642 "heraldry": "hobbies nobility", 

643 "philately": "hobbies", 

644 "hobbies": "lifestyle", 

645 "numismatics": "hobbies", 

646 "chronology": "horology", 

647 "horology": "hobbies", 

648 "cryptography": "computing", 

649 "encryption": "cryptography", 

650 "finance": "business", 

651 "finances": "finance", 

652 "financial": "finance", 

653 "accounting": "finance", 

654 "economics": "sciences", 

655 "microeconomics": "economics", 

656 "politics": "government", 

657 "geopolitics": "politics", 

658 "sociopolitics": "politics", 

659 "ideology": "politics philosophy", 

660 "feminism": "ideology", 

661 "communism": "ideology", 

662 "socialism": "ideology", 

663 "capitalism": "ideology", 

664 "feudalism": "politics", 

665 "fascism": "ideology", 

666 "white supremacist ideology": "ideology", 

667 "manosphere": "ideology", 

668 "pedology": "geography psychology", 

669 "biogeography": "geography biology", 

670 "cryptocurrency": "finance", 

671 "nobility": "monarchy", 

672 "monarchy": "politics", 

673 "demography": "demographics", 

674 "historical demography": "demography", 

675 "chromatography": "chemistry", 

676 "anarchism": "ideology", 

677 "economic liberalism": "ideology", 

678 "diplomacy": "politics", 

679 "regionalism": "politics", 

680 "war": "politics", 

681 "military": "war government", 

682 "agri.": "agriculture", 

683 "agriculture": "business lifestyle", 

684 "horticulture": "agriculture", 

685 "fashion": "lifestyle", 

686 "cosmetics": "lifestyle", 

687 "design": "arts", 

688 "money": "finance", 

689 "oceanography": "geography", 

690 "geological oceanography": "geology oceanography", 

691 "angelology": "theology", 

692 "woodworking": "carpentry crafts", 

693 "art": "arts", 

694 "television": "broadcasting", 

695 "broadcasting": "media", 

696 "radio": "broadcasting", 

697 "radio communications": "radio", 

698 "radio technics": "radio", 

699 "journalism": "media", 

700 "writing": "journalism literature communications publishing", 

701 "editing": "writing publishing", 

702 "poetry": "writing", 

703 "film": "television", 

704 "cinematography": "film", 

705 "drama": "dramaturgy", 

706 "dramaturgy": "film theater", 

707 "printing": "publishing", 

708 "publishing": "media", 

709 "science fiction": "literature", 

710 "space science": "aerospace", 

711 "astronautics": "aerospace", 

712 "aerodynamics": "aerospace physics", 

713 "NASA": "aerospace", 

714 "ESA": "aerospace", 

715 "fiction": "literature", 

716 "pornography": "media sexuality", 

717 "DVD": "media", 

718 "sex": "sexuality", 

719 "bibliography": "information-science", 

720 "information science": "information-science", 

721 "information-science": "human-sciences computing", 

722 "naturism": "lifestyle", 

723 "veganism": "lifestyle", 

724 "urbanism": "lifestyle", 

725 "Kantianism": "philosophy", 

726 "newspapers": "journalism", 

727 "telegraphy": "telecommunications", 

728 "wireless telegraphy": "telegraphy", 

729 "telegram": "telegraphy", 

730 "audio": "electrical-engineering", 

731 "literature": "publishing", 

732 "folklore": "arts literature history", 

733 "MMORPG": "Internet video-games", 

734 "ACG": "video-games", 

735 "roguelikes": "video-games", 

736 "Magic: The Gathering": "games", 

737 "IRC": "Internet", 

738 "CSS": "Internet", 

739 "blogging": "Internet", 

740 "music": "entertainment", 

741 "baile funk": "music", 

742 "musical note": "music", 

743 "guitar": "music", 

744 "handbells": "music", 

745 "handball": "ball-games", 

746 "racquet sports": "ball-games", 

747 "billiards": "ball-games", 

748 "musicology": "music human-sciences", 

749 "MIDI": "music", 

750 "talking": "communications", 

751 "militaryu": "military error-misspelling", 

752 "army": "military", 

753 "navy": "military", 

754 "naval": "navy", 

755 "weaponry": "military tools", 

756 "weapon": "weaponry", 

757 "firearms": "weaponry", 

758 "artillery": "weaponry", 

759 "ballistics": "weaponry physics", 

760 "fortifications": "military", 

761 "fortification": "fortifications", 

762 "law enforcement": "government", 

763 "police": "law-enforcement", 

764 "firefighting": "government", 

765 "archaeology": "history", 

766 "epigraphy": "history literature", 

767 "paleontology": "history biology", 

768 "palæontology": "paleontology", 

769 "paleobiology": "paleontology", 

770 "paleoanthropology": "paleontology anthropology", 

771 "paleogeography": "paleontology geography", 

772 "paleography": "epigraphy paleogeography", 

773 "palentology": "paleontology error-misspelling", 

774 "papyrology": "history", 

775 "hagiography": "history religion", 

776 "palaeography": "paleography", 

777 "historical geography": "geography history", 

778 "historiography": "history", 

779 "calligraphy": "arts writing", 

780 "crocheting": "crafts", 

781 "ichthyology": "zoology", 

782 "fish": "ichthyology", 

783 "herpetology": "zoology", 

784 "glaciology": "geography", 

785 "arachnology": "zoology", 

786 "mammals": "zoology", 

787 "mammalogy": "zoology", 

788 "rodents": "mammals", 

789 "snakes": "zoology", 

790 "veterinary pathology": "zoology pathology", 

791 "veterinary": "zoology pathology", 

792 "conservation": "biology history", 

793 "patology": "pathology error-misspelling", 

794 "acarology": "zoology", 

795 "mythology": "mysticism", 

796 "ufology": "mythology", 

797 "fundamental interactions": "physics", 

798 "quantum field theory": "physics", 

799 "colorimetry": "physics", 

800 "extragalactic medium": "cosmology", 

801 "extra-cluster medium": "cosmology", 

802 "uranography": "cartography astronomy", 

803 "astrocartography": "uranography", 

804 "mining": "business", 

805 "quarrying": "mining", 

806 "forestry": "business", 

807 "metalworking": "metallurgy crafts", 

808 "tin-plate manufacture": "manufacturing", 

809 "metallurgy": "engineering", 

810 "brick-making": "manufacturing", 

811 "communication": "communications", 

812 "telecommunications": "electrical-engineering communications", 

813 "telephony": "telecommunications communications", 

814 "mobile telephony": "telephony", 

815 "telephone": "telephony", 

816 "bookbinding": "crafts", 

817 "engraving": "crafts", 

818 "petrology": "geology", 

819 "petrography": "petrology", 

820 "petroleum": "energy", 

821 "energy": "business physics", 

822 "shipbuilding": "manufacturing", 

823 "plumbing": "construction", 

824 "roofing": "construction", 

825 "carpentry": "construction", 

826 "construction": "manufacturing", 

827 "piledriving": "construction", 

828 "masonry": "construction", 

829 "stone": "masonry", 

830 "tools": "engineering", 

831 "cranes": "tools", 

832 "colleges": "education", 

833 "higher education": "education", 

834 "clothing": "textiles fashion", 

835 "dyeing": "textiles", 

836 "fabrics": "textiles", 

837 "alchemy": "pseudoscience", 

838 "photography": "hobbies arts", 

839 "videography": "photography film", 

840 "pets": "lifestyle", 

841 "horses": "pets sports", 

842 "equestrianism": "horses", 

843 "equestrian": "horses", 

844 "dressage": "horses", 

845 "horse racing": "horseracing", 

846 "horse-racing": "horseracing", 

847 "horseracing": "horses racing", 

848 "equitation": "horses", 

849 "farriery": "horses", 

850 "dogs": "pets", 

851 "sheepdog trials": "dogs", 

852 "demoscene": "computing lifestyle", 

853 "golf": "sports", 

854 "tennis": "sports", 

855 "hunting": "hobbies", 

856 "fishing": "hobbies", 

857 "birdwashing": "hobbies", 

858 "paintball": "games", 

859 "fisheries": "ecology", 

860 "limnology": "ecology geology", 

861 "informatics": "computing", 

862 "bioinformatics": "computing biology", 

863 "marketing": "business", 

864 "advertising": "marketing", 

865 "electrotechnology": "electrical-engineering", 

866 "electromagnetic radiation": "electromagnetism", 

867 "electronics manufacturing": "manufacturing", 

868 "electric power": "energy electrical-engineering", 

869 "electronic communication": "telecommunications", 

870 "electrical device": "electrical-engineering", 

871 "cigars": "lifestyle", 

872 "smoking": "lifestyle", 

873 "flowery": "lifestyle", 

874 "gambling": "games", 

875 "bingo": "games", 

876 "exercise": "sports", 

877 "football": "ball-games", 

878 "netball": "ball-games", 

879 "softball": "ball-games", 

880 "American football": "football", 

881 "acting": "film theater", 

882 "theater": "entertainment", 

883 "comedy": "entertainment", 

884 "entertainment": "lifestyle", 

885 "dominoes": "games", 

886 "pocket billiards": "games", 

887 "pool": "games", 

888 "graphical user interface": "computing", 

889 "mysticism": "philosophy", 

890 "philology": "linguistics history philosophy", 

891 "enthnology": "human-sciences", 

892 "creationism": "religion", 

893 "shamanism": "religion", 

894 "politology": "political-science", 

895 "political-science": "social-sciences", 

896 "political science": "political-science", 

897 "cartomancy": "mysticism", 

898 "tarot": "mysticism", 

899 "tasseography": "mysticism", 

900 "occult": "mysticism", 

901 "theology": "religion", 

902 "religionists": "religion", 

903 "spiritualism": "religion", 

904 "spiritism": "spiritualism", 

905 "demonology": "religion", 

906 "Zoroastrianism": "religion", 

907 "Wicca": "religion", 

908 "Buddhism": "religion", 

909 "Buddhist": "Buddhism", 

910 "Shingon Buddhism": "Buddhism", 

911 "Tendai or Kegon Buddhism": "Buddhism", 

912 "Zen and Pure Land Buddhism": "Buddhism", 

913 "Tham": "Buddhism", # Buddhist religious script 

914 "motor racing": "racing", 

915 "racing": "sports", 

916 "spinning": "sports", 

917 "gymnastics": "sports", 

918 "cricket": "ball-games", 

919 "volleyball": "ball-games", 

920 "lacrosse": "ball-games", 

921 "rugby": "ball-games", 

922 "bodybuilding": "sports", 

923 "falconry": "hunting", 

924 "hawking": "falconry", 

925 "parachuting": "hobbies", 

926 "squash": "ball-games", 

927 "curling": "ball-games", 

928 "motorcycling": "hobbies", 

929 "swimming": "sports", 

930 "diving": "sports", 

931 "underwater diving": "diving", 

932 "basketball": "ball-games", 

933 "baseball": "ball-games", 

934 "pesäpallo": "ball-games", 

935 "soccer": "ball-games", 

936 "snooker": "ball-games", 

937 "snowboarding": "sports", 

938 "skateboarding": "sports", 

939 "weightlifting": "sports", 

940 "skiing": "sports", 

941 "alpine skiing": "skiing", 

942 "aerial freestyle": "skiing", 

943 "mountaineering": "sports", 

944 "skating": "sports", 

945 "ice hockey": "skating", 

946 "cycling": "sports", 

947 "rowing": "sports", 

948 "boxing": "martial-arts", 

949 "Scouting": "lifestyle", 

950 "bullfighting": "entertainment", 

951 "archery": "martial-arts", 

952 "fencing": "martial-arts", 

953 "climbing": "sports", 

954 "surfing": "sports", 

955 "ballooning": "sports", 

956 "sailmaking": "crafts nautical", 

957 "sailing": "nautical", 

958 "maritime": "nautical", 

959 "ropemaking": "crafts nautical", 

960 "nautical": "transport", 

961 "retail": "commerce", 

962 "commercial": "commerce", 

963 "retailing": "commerce", 

964 "electrical": "electricity", 

965 "category theory": "mathematics computing", 

966 "in technical contexts": "engineering physics chemistry computing", 

967 "technology": "engineering", 

968 "technical": "engineering", 

969 "stock exchange": "finance", 

970 "stock market": "finance", 

971 "stock ticker symbol": "finance", 

972 "trading": "finance", 

973 "surveying": "geography", 

974 "networking": "computing", 

975 "computer sciences": "computing", 

976 "computer software": "computing", 

977 "software compilation": "computing", 

978 "computer languages": "computing", 

979 "computer hardware": "computing", 

980 "computer graphics": "computing", 

981 "meats": "food", 

982 "meat": "meats", 

983 "web design": "computing", 

984 "aviation": "aeronautics", 

985 "aeronautics": "aerospace", 

986 "aerospace": "engineering business", 

987 "rocketry": "aerospace", 

988 "investment": "finance", 

989 "computing theory": "computing mathematics", 

990 "information theory": "mathematics computing", 

991 "probability": "mathematics", 

992 "probability theory": "mathematics", 

993 "set theory": "mathematics", 

994 "sets": "mathematics", 

995 "order theory": "mathematics", 

996 "graph theory": "mathematics", 

997 "group theory": "mathematics", 

998 "complex analysis": "mathematics", 

999 "measure theory": "mathematics", 

1000 "combinatorics": "mathematics", 

1001 "cellular automata": "computing mathematics", 

1002 "game theory": "mathematics computing", 

1003 "computational": "computing", 

1004 "computer": "computing", 

1005 "behavioral sciences": "psychology", 

1006 "behavior": "psychology", 

1007 "clinical psychology": "psychology", 

1008 "psycology": "psychology", 

1009 "space sciences": "astronomy", 

1010 "applied sciences": "sciences engineering", 

1011 "civil engineering": "engineering", 

1012 "banking": "business", 

1013 "commerce": "business", 

1014 "real-estate": "business", 

1015 "real estate": "real-estate", 

1016 "cryptocurrencies": "cryptocurrency", 

1017 "cartography": "geography", 

1018 "ecology": "biology", 

1019 "hydrology": "geography", 

1020 "hydrography": "hydrology oceanography", 

1021 "hydrodynamics": "hydrology physics", 

1022 "topography": "geography", 

1023 "polygraphy": "law", 

1024 "planetology": "astronomy", 

1025 "astrology": "mysticism", 

1026 "astrology signs": "astrology", 

1027 "linguistic morphology": "morphology", 

1028 "console": "video-games", 

1029 "video games": "video-games", 

1030 "role-playing games": "games", 

1031 "poker": "card-games", 

1032 "waterpolo": "games", 

1033 "wrestling": "martial-arts", 

1034 "professional wrestling": "wrestling", 

1035 "sumo": "wrestling", 

1036 "legal": "law", 

1037 "copyright": "intellectual-property", 

1038 "intellectual-property": "law", 

1039 "patent law": "intellectual-property", 

1040 "intellectual property": "intellectual-property", 

1041 "court": "law government", 

1042 "rail transport": "railways", 

1043 "traffic": "transport", 

1044 "incoterm": "transport law business", 

1045 "road": "transport", 

1046 "colour": "color", 

1047 "days of the week": "weekdays", 

1048 "weekdays": "time", 

1049 "duration": "time", 

1050 "temporal location": "time", 

1051 "monotheism": "religion", 

1052 "Catholicism": "Christianity", 

1053 "Shinto": "religion", 

1054 "Gnosticism": "religion", 

1055 "Protestantism": "Christianity", 

1056 "occultism": "religion", 

1057 "buddhism": "religion", 

1058 "hinduism": "religion", 

1059 "Roman Catholicism": "Catholicism", 

1060 "carnaval": "lifestyle", 

1061 "organic chemistry": "chemistry", 

1062 "inorganic chemistry": "chemistry", 

1063 "gaming": "games", 

1064 "SI units": "units-of-measure", 

1065 "units of measure": "units-of-measure", 

1066 "Western Christianity": "Christianity", 

1067 "Eastern Christianity": "Christianity", 

1068 "Abrahamic religions": "religion", 

1069 

1070}