# -*- coding: utf-8 -*-
"""
Jamdict public APIs
"""
# This code is a part of jamdict library: https://github.com/neocl/jamdict
# :copyright: (c) 2016 Le Tuan Anh <tuananh.ke@gmail.com>
# :license: MIT, see LICENSE for more details.
import os
import logging
import threading
import warnings
from pathlib import Path
from collections import defaultdict as dd
from collections import OrderedDict
from typing import List, Sequence
from chirptext.deko import HIRAGANA, KATAKANA
_MEMORY_MODE = False
try:
from puchikarui import MemorySource
_MEMORY_MODE = True
except ImportError:
pass
from puchikarui import ExecutionContext
from . import config
from .jmdict import JMDictXMLParser, JMDEntry
from .krad import KRad
from .jmdict_sqlite import JMDictSQLite
from .kanjidic2 import Kanjidic2XMLParser, Character
from .kanjidic2_sqlite import KanjiDic2SQLite
from .jmnedict_sqlite import JMNEDictSQLite
try:
import jamdict_data
_JAMDICT_DATA_AVAILABLE = True
except Exception:
_JAMDICT_DATA_AVAILABLE = False
########################################################################
def getLogger():
return logging.getLogger(__name__)
########################################################################
[docs]class LookupResult(object):
""" Contain lookup results (words, Kanji characters, or named entities) from Jamdict.
A typical jamdict lookup is like this:
>>> jam = Jamdict()
>>> result = jam.lookup('食べ%る')
The command above returns a :any:`LookupResult` object which contains found words (:any:`entries`),
kanji characters (:any:`chars`), and named entities (:any:`names`).
"""
def __init__(self, entries, chars, names=None):
self.__entries: Sequence[JMDEntry] = entries if entries else []
self.__chars: Sequence[Character] = chars if chars else []
self.__names: Sequence[JMDEntry] = names if names else []
@property
def entries(self) -> Sequence[JMDEntry]:
""" A list of words entries
:returns: a list of :class:`JMDEntry <jamdict.jmdict.JMDEntry>` object
:rtype: List[JMDEntry]
"""
return self.__entries
@entries.setter
def entries(self, values: Sequence[JMDEntry]):
self.__entries = values
@property
def chars(self) -> Sequence[Character]:
""" A list of found kanji characters
:returns: a list of :class:`Character <jamdict.kanjidic2.Character>` object
:rtype: Sequence[Character]
"""
return self.__chars
@chars.setter
def chars(self, values: Sequence[Character]):
self.__chars = values
@property
def names(self) -> Sequence[JMDEntry]:
""" A list of found named entities
:returns: a list of :class:`JMDEntry <jamdict.jmdict.JMDEntry>` object
:rtype: Sequence[JMDEntry]
"""
return self.__names
@names.setter
def names(self, values: Sequence[JMDEntry]):
self.__names = values
[docs] def text(self, compact=True, entry_sep='。', separator=' | ', no_id=False, with_chars=True) -> str:
""" Generate a text string that contains all found words, characters, and named entities.
:param compact: Make the output string more compact (fewer info, fewer whitespaces, etc.)
:param no_id: Do not include jamdict's internal object IDs (for direct query via API)
:param entry_sep: The text to separate entries
:param with_chars: Include characters information
:returns: A formatted string ready for display
"""
output = []
if self.entries:
entry_txts = []
for idx, e in enumerate(self.entries, start=1):
entry_txt = e.text(compact=compact, separator=' ', no_id=no_id)
entry_txts.append("#{}: {}".format(idx, entry_txt))
output.append("[Entries]")
output.append(entry_sep)
output.append(entry_sep.join(entry_txts))
elif not compact:
output.append("No entries")
if self.chars and with_chars:
if compact:
chars_txt = ', '.join(str(c) for c in self.chars)
else:
chars_txt = ', '.join(repr(c) for c in self.chars)
if output:
output.append(separator) # TODO: section separator?
output.append("[Chars]")
output.append(entry_sep)
output.append(chars_txt)
if self.names:
name_txts = []
for idx, n in enumerate(self.names, start=1):
name_txt = n.text(compact=compact, separator=' ', no_id=no_id)
name_txts.append("#{}: {}".format(idx, name_txt))
if output:
output.append(separator)
output.append("[Names]")
output.append(entry_sep)
output.append(entry_sep.join(name_txts))
return "".join(output) if output else "Found nothing"
def __repr__(self):
return self.text(compact=True)
def __str__(self):
return self.text(compact=False)
def to_json(self):
warnings.warn("to_json() is deprecated and will be removed in the next major release. Use to_dict() instead.",
DeprecationWarning, stacklevel=2)
return self.to_dict()
def to_dict(self) -> dict:
return {'entries': [e.to_dict() for e in self.entries],
'chars': [c.to_dict() for c in self.chars],
'names': [n.to_dict() for n in self.names]}
[docs]class IterLookupResult(object):
""" Contain lookup results (words, Kanji characters, or named entities) from Jamdict.
A typical jamdict lookup is like this:
>>> res = jam.lookup_iter("花見")
``res`` is an :class:`IterLookupResult` object which contains iterators
to scan through found words (``entries``), kanji characters (``chars``),
and named entities (:any:`names`) one by one.
>>> for word in res.entries:
... print(word) # do somethign with the word
>>> for c in res.chars:
... print(c)
>>> for name in res.names:
... print(name)
"""
def __init__(self, entries, chars=None, names=None):
self.__entries = entries if entries is not None else []
self.__chars = chars if chars is not None else []
self.__names = names if names is not None else []
@property
def entries(self):
""" Iterator for looping one by one through all found entries, can only be used once """
return self.__entries
@property
def chars(self):
""" Iterator for looping one by one through all found kanji characters, can only be used once """
return self.__chars
@property
def names(self):
""" Iterator for looping one by one through all found named entities, can only be used once """
return self.__names
class JamdictSQLite(KanjiDic2SQLite, JMNEDictSQLite, JMDictSQLite):
def __init__(self, db_file, *args, **kwargs):
super().__init__(db_file, *args, **kwargs)
[docs]class Jamdict(object):
""" Main entry point to access all available dictionaries in jamdict.
>>> from jamdict import Jamdict
>>> jam = Jamdict()
>>> result = jam.lookup('食べ%る')
# print all word entries
>>> for entry in result.entries:
>>> print(entry)
# print all related characters
>>> for c in result.chars:
>>> print(repr(c))
To filter results by ``pos``, for example look for all "かえる" that are nouns, use:
>>> result = jam.lookup("かえる", pos=["noun (common) (futsuumeishi)"])
To search for named-entities by type, use the type string as query.
For example to search for all "surname" use:
>>> result = jam.lookup("surname")
To find out which part-of-speeches or named-entities types are available in the
dictionary, use :func:`Jamdict.all_pos <jamdict.util.Jamdict.all_pos>`
and :func:`Jamdict.all_ne_type <jamdict.util.Jamdict.all_pos>`.
Jamdict >= 0.1a10 support ``memory_mode`` keyword argument for reading
the whole database into memory before querying to boost up search speed.
The database may take about a minute to load. Here is the sample code:
>>> jam = Jamdict(memory_mode=True)
When there is no suitable database available, Jamdict will try to use database
from `jamdict-data <https://pypi.org/project/jamdict-data/>`_ package by default.
If there is a custom database available in configuration file,
Jamdict will prioritise to use it over the ``jamdict-data`` package.
"""
def __init__(self, db_file=None, kd2_file=None,
jmd_xml_file=None, kd2_xml_file=None,
auto_config=True, auto_expand=True, reuse_ctx=True,
jmnedict_file=None, jmnedict_xml_file=None,
memory_mode=False, **kwargs):
# data sources
self.reuse_ctx = reuse_ctx
self._db_sqlite = None
self._kd2_sqlite = None
self._jmne_sqlite = None
self._jmd_xml = None
self._kd2_xml = None
self._jmne_xml = None
self.__krad_map = None
self.__jm_ctx = None # for reusing database context
self.__memory_mode = memory_mode
# file paths configuration
self.auto_expand = auto_expand
self.jmd_xml_file = jmd_xml_file if jmd_xml_file else config.get_file('JMDICT_XML') if auto_config else None
self.kd2_xml_file = kd2_xml_file if kd2_xml_file else config.get_file('KD2_XML') if auto_config else None
self.jmnedict_xml_file = jmnedict_xml_file if jmnedict_xml_file else config.get_file('JMNEDICT_XML') if auto_config else None
if auto_expand:
if self.jmd_xml_file:
self.jmd_xml_file = os.path.expanduser(self.jmd_xml_file)
if self.kd2_xml_file:
self.kd2_xml_file = os.path.expanduser(self.kd2_xml_file)
if self.jmnedict_xml_file:
self.jmnedict_xml_file = os.path.expanduser(self.jmnedict_xml_file)
self.db_file = db_file if db_file else config.get_file('JAMDICT_DB') if auto_config else None
if not self.db_file or (self.db_file != ':memory:' and not os.path.isfile(self.db_file)):
if _JAMDICT_DATA_AVAILABLE:
self.db_file = jamdict_data.JAMDICT_DB_PATH
elif self.jmd_xml_file and os.path.isfile(self.jmd_xml_file):
getLogger().warning("JAMDICT_DB could NOT be found. Searching will be extremely slow. Please run `python3 -m jamdict import` first")
self.kd2_file = kd2_file if kd2_file else self.db_file if auto_config else None
if not self.kd2_file or (self.kd2_file != ':memory:' and not os.path.isfile(self.kd2_file)):
if _JAMDICT_DATA_AVAILABLE:
self.kd2_file = None # jamdict_data.JAMDICT_DB_PATH
elif self.kd2_xml_file and os.path.isfile(self.kd2_xml_file):
getLogger().warning("Kanjidic2 database could NOT be found. Searching will be extremely slow. Please run `python3 -m jamdict import` first")
self.jmnedict_file = jmnedict_file if jmnedict_file else self.db_file if auto_config else None
if not self.jmnedict_file or (self.jmnedict_file != ':memory:' and not os.path.isfile(self.jmnedict_file)):
if _JAMDICT_DATA_AVAILABLE:
self.jmnedict_file = None # jamdict_data.JAMDICT_DB_PATH
elif self.jmnedict_xml_file and os.path.isfile(self.jmnedict_xml_file):
getLogger().warning("JMNE database could NOT be found. Searching will be extremely slow. Please run `python3 -m jamdict import` first")
@property
def ready(self) -> bool:
""" Check if Jamdict database is available """
return os.path.isfile(self.db_file) and self.jmdict is not None
def __del__(self):
if self.__jm_ctx is not None:
try:
# try to close default SQLite context if needed
self.__jm_ctx.close()
except Exception:
pass
def __make_db_ctx(self) -> ExecutionContext:
""" Try to reuse context if allowed """
try:
if not self.reuse_ctx:
return self.jmdict.ctx()
elif self.__jm_ctx is None and self.db_file and (self.db_file == ":memory:" or os.path.isfile(self.db_file)):
self.__jm_ctx = self.jmdict.ctx()
except Exception:
getLogger().warning("JMdict data could not be accessed.")
return self.__jm_ctx
@property
def db_file(self):
return self.__db_file
@db_file.setter
def db_file(self, value):
if self.auto_expand and value and value != ':memory:':
self.__db_file = os.path.abspath(os.path.expanduser(value))
else:
self.__db_file = value
@property
def kd2_file(self):
return self.__kd2_file
@kd2_file.setter
def kd2_file(self, value):
if self.auto_expand and value and value != ':memory:':
self.__kd2_file = os.path.abspath(os.path.expanduser(value))
else:
self.__kd2_file = value
@property
def jmnedict_file(self):
return self.__jmnedict_file
@jmnedict_file.setter
def jmnedict_file(self, value):
if self.auto_expand and value and value != ':memory:':
self.__jmnedict_file = os.path.abspath(os.path.expanduser(value))
else:
self.__jmnedict_file = value
@property
def memory_mode(self):
""" if memory_mode = True, Jamdict DB will be loaded into RAM before querying for better performance """
return self.__memory_mode
@property
def jmdict(self):
if not self._db_sqlite and self.db_file:
with threading.Lock():
# Use 1 DB for all
if self.memory_mode and _MEMORY_MODE:
data_source = MemorySource(self.db_file)
else:
if self.memory_mode and not _MEMORY_MODE:
logging.getLogger(__name__).error("Memory mode could not be enabled because puchikarui version is too old. Fallback to normal file DB mode")
data_source = self.db_file
self._db_sqlite = JamdictSQLite(data_source, auto_expand_path=self.auto_expand)
return self._db_sqlite
@property
def kd2(self):
if self._kd2_sqlite is None:
if self.kd2_file is not None and os.path.isfile(self.kd2_file):
with threading.Lock():
if self.memory_mode and _MEMORY_MODE:
data_source = MemorySource(self.kd2_file)
else:
if self.memory_mode and not _MEMORY_MODE:
logging.getLogger(__name__).error("Memory mode could not be enabled because puchikarui version is too old. Fallback to normal file DB mode")
data_source = self.kd2_file
self._kd2_sqlite = KanjiDic2SQLite(data_source, auto_expand_path=self.auto_expand)
elif not self.kd2_file or self.kd2_file == self.db_file:
self._kd2_sqlite = self.jmdict
return self._kd2_sqlite
@property
def jmnedict(self):
""" JM NE SQLite database access object """
if self._jmne_sqlite is None:
if self.jmnedict_file is not None:
with threading.Lock():
if self.memory_mode and _MEMORY_MODE:
data_source = MemorySource(self.jmnedict_file)
else:
if self.memory_mode and not _MEMORY_MODE:
logging.getLogger(__name__).error("Memory mode could not be enabled because puchikarui version is too old. Fallback to normal file DB mode")
data_source = self.jmnedict_file
self._jmne_sqlite = JMNEDictSQLite(data_source, auto_expand_path=self.auto_expand)
elif not self.jmnedict_file or self.jmnedict_file == self.db_file:
self._jmne_sqlite = self.jmdict
return self._jmne_sqlite
@property
def jmdict_xml(self):
if not self._jmd_xml and self.jmd_xml_file:
with threading.Lock():
getLogger().info("Loading JMDict from XML file at {}".format(self.jmd_xml_file))
self._jmd_xml = JMDictXML.from_file(self.jmd_xml_file)
getLogger().info("Loaded JMdict entries: {}".format(len(self._jmd_xml)))
return self._jmd_xml
@property
def krad(self):
""" Break a kanji down to writing components
>>> jam = Jamdict()
>>> print(jam.krad['雲'])
['一', '雨', '二', '厶']
"""
if not self.__krad_map:
with threading.Lock():
self.__krad_map = KRad()
return self.__krad_map.krad
@property
def radk(self):
""" Find all kanji with a writing component
>>> jam = Jamdict()
>>> print(jam.radk['鼎'])
{'鼏', '鼒', '鼐', '鼎', '鼑'}
"""
if not self.__krad_map:
with threading.Lock():
self.__krad_map = KRad()
return self.__krad_map.radk
@property
def kd2_xml(self):
if not self._kd2_xml and self.kd2_xml_file:
with threading.Lock():
getLogger().info("Loading KanjiDic2 from XML file at {}".format(self.kd2_xml_file))
self._kd2_xml = KanjiDic2XML.from_file(self.kd2_xml_file)
getLogger().info("Loaded KanjiDic2 entries: {}".format(len(self._kd2_xml)))
return self._kd2_xml
@property
def jmne_xml(self):
if not self._jmne_xml and self.jmnedict_xml_file:
with threading.Lock():
getLogger().info("Loading JMnedict from XML file at {}".format(self.jmnedict_xml_file))
self._jmne_xml = JMNEDictXML.from_file(self.jmnedict_xml_file)
getLogger().info("Loaded JMnedict entries: {}".format(len(self._jmne_xml)))
return self._jmne_xml
def has_kd2(self) -> bool:
return self.db_file is not None or self.kd2_file is not None or self.kd2_xml_file is not None
def has_jmne(self, ctx=None) -> bool:
""" Check if current database has jmne support """
if ctx is None:
ctx = self.__make_db_ctx()
m = ctx.meta.select_single('key=?', ('jmnedict.version',)) if ctx is not None else None
return m is not None and len(m.value) > 0
def is_available(self) -> bool:
# this function is for developer only
# don't expose it to the public
# ready should be used instead
return (self.db_file is not None or self.jmd_xml_file is not None or
self.kd2_file is not None or self.kd2_xml_file is not None or
self.jmnedict_file is not None or self.jmnedict_xml_file is not None)
def import_data(self):
""" Import JMDict and KanjiDic2 data from XML to SQLite """
if self.db_file and not os.path.exists(self.db_file):
Path(self.db_file).touch()
ctx = self.__make_db_ctx()
ctx.buckmode()
ctx.auto_commit = False
if self.jmdict and self.jmdict_xml:
getLogger().info("Importing JMDict data")
self.jmdict.insert_entries(self.jmdict_xml, ctx=ctx)
# import KanjiDic2
if self.kd2_xml is not None and os.path.isfile(self.kd2_xml_file):
getLogger().info("Importing KanjiDic2 data")
if self.jmdict is not None and self.kd2_file == self.db_file:
self.jmdict.insert_chars(self.kd2_xml, ctx=ctx)
elif self.kd2 is not None:
getLogger().warning(f"Building Kanjidic2 DB using a different DB context {self.kd2_file} vs {self.db_file}")
with self.kd2.ctx() as kd_ctx:
self.kd2.insert_chars(self.kd2_xml, ctx=kd_ctx)
else:
getLogger().warning(f"Kanjidic2 DB path could not be found")
else:
print(f"kd2_xml: {self.kd2_xml}")
print(f"kd2_xml_file: {self.kd2_xml_file}")
getLogger().warning("KanjiDic2 XML data is not available - skipped!")
# import JMNEdict
if self.jmne_xml is not None and os.path.isfile(self.jmnedict_xml_file):
getLogger().info("Importing JMNEdict data")
if self.jmdict is not None and self.jmnedict_file == self.db_file:
self.jmnedict.insert_name_entities(self.jmne_xml, ctx=ctx)
elif self.jmnedict is not None:
getLogger().warning(f"Building Kanjidic2 DB using a different DB context {self.jmne_file} vs {self.db_file}")
with self.jmnedict.ctx() as ne_ctx:
self.jmnedict.insert_name_entities(self.jmne_xml, ctx=ne_ctx)
else:
getLogger().warning(f"JMNE DB path could not be found")
else:
getLogger().warning("JMNEdict XML data is not available - skipped!")
_buckmode_off = getattr(ctx, "buckmode_off", None)
if _buckmode_off is not None:
_buckmode_off()
ctx.commit()
def get_ne(self, idseq, ctx=None) -> JMDEntry:
""" Get name entity by idseq in JMNEdict """
if self.jmnedict is not None:
if ctx is None:
ctx = self.__make_db_ctx()
return self.jmnedict.get_ne(idseq, ctx=ctx)
elif self.jmnedict_xml_file:
return self.jmne_xml.lookup(idseq)
else:
raise LookupError("There is no JMnedict data source available")
def get_char(self, literal, ctx=None) -> Character:
if self.kd2 is not None:
if ctx is None:
ctx = self.__make_db_ctx()
return self.kd2.get_char(literal, ctx=ctx)
elif self.kd2_xml:
return self.kd2_xml.lookup(literal)
else:
raise LookupError("There is no KanjiDic2 data source available")
def get_entry(self, idseq) -> JMDEntry:
if self.jmdict:
return self.jmdict.get_entry(idseq)
elif self.jmdict_xml:
return self.jmdict_xml.lookup(idseq)[0]
else:
raise LookupError("There is no backend data available")
[docs] def all_pos(self, ctx=None) -> List[str]:
""" Find all available part-of-speeches
:returns: A list of part-of-speeches (a list of strings)
"""
if ctx is None:
ctx = self.__make_db_ctx()
return self.jmdict.all_pos(ctx=ctx)
[docs] def all_ne_type(self, ctx=None) -> List[str]:
""" Find all available named-entity types
:returns: A list of named-entity types (a list of strings)
"""
if ctx is None:
ctx = self.__make_db_ctx()
return self.jmnedict.all_ne_type(ctx=ctx)
[docs] def lookup(self, query, strict_lookup=False, lookup_chars=True, ctx=None,
lookup_ne=True, pos=None, **kwargs) -> LookupResult:
""" Search words, characters, and characters.
Keyword arguments:
:param query: Text to query, may contains wildcard characters. Use `?` for 1 exact character and `%` to match any number of characters.
:param strict_lookup: only look up the Kanji characters in query (i.e. discard characters from variants)
:type strict_lookup: bool
:param: lookup_chars: set lookup_chars to False to disable character lookup
:type lookup_chars: bool
:param pos: Filter words by part-of-speeches
:type pos: list of strings
:param ctx: database access context, can be reused for better performance. Normally users do not have to touch this and database connections will be reused by default.
:param lookup_ne: set lookup_ne to False to disable name-entities lookup
:type lookup_ne: bool
:returns: Return a LookupResult object.
:rtype: :class:`jamdict.util.LookupResult`
>>> # match any word that starts with "食べ" and ends with "る" (anything from between is fine)
>>> jam = Jamdict()
>>> results = jam.lookup('食べ%る')
"""
if not self.is_available():
raise LookupError("There is no backend data available")
elif (not query or query == "%") and not pos:
raise ValueError("Query and POS filter cannot be both empty")
if ctx is None:
ctx = self.__make_db_ctx()
entries = []
chars = []
names = []
if self.jmdict is not None:
entries = self.jmdict.search(query, pos=pos, ctx=ctx)
elif self.jmdict_xml:
entries = self.jmdict_xml.lookup(query)
if lookup_chars and self.has_kd2():
# lookup each character in query and kanji readings of each found entries
chars_to_search = OrderedDict({c: c for c in query})
if not strict_lookup and entries:
# auto add characters from entries
for e in entries:
for k in e.kanji_forms:
for c in k.text:
if c not in HIRAGANA and c not in KATAKANA:
chars_to_search[c] = c
for c in chars_to_search:
result = self.get_char(c, ctx=ctx)
if result is not None:
chars.append(result)
# lookup name-entities
if lookup_ne and self.has_jmne(ctx=ctx):
names = self.jmnedict.search_ne(query, ctx=ctx)
# finish
return LookupResult(entries, chars, names)
[docs] def lookup_iter(self, query, strict_lookup=False,
lookup_chars=True, lookup_ne=True,
ctx=None, pos=None, **kwargs) -> LookupResult:
""" Search for words, characters, and characters iteratively.
An :class:`IterLookupResult` object will be returned instead of the normal ``LookupResult``.
``res.entries``, ``res.chars``, ``res.names`` are iterators instead of lists and each of them
can only be looped through once. Users have to store the results manually.
>>> res = jam.lookup_iter("花見")
>>> for word in res.entries:
... print(word) # do somethign with the word
>>> for c in res.chars:
... print(c)
>>> for name in res.names:
... print(name)
Keyword arguments:
:param query: Text to query, may contains wildcard characters. Use `?` for 1 exact character and `%` to match any number of characters.
:param strict_lookup: only look up the Kanji characters in query (i.e. discard characters from variants)
:type strict_lookup: bool
:param: lookup_chars: set lookup_chars to False to disable character lookup
:type lookup_chars: bool
:param pos: Filter words by part-of-speeches
:type pos: list of strings
:param ctx: database access context, can be reused for better performance. Normally users do not have to touch this and database connections will be reused by default.
:param lookup_ne: set lookup_ne to False to disable name-entities lookup
:type lookup_ne: bool
:returns: Return an IterLookupResult object.
:rtype: :class:`jamdict.util.IterLookupResult`
"""
if not self.is_available():
raise LookupError("There is no backend data available")
elif (not query or query == "%") and not pos:
raise ValueError("Query and POS filter cannot be both empty")
if ctx is None:
ctx = self.__make_db_ctx()
# Lookup entries, chars, and names
entries = None
chars = None
names = None
if self.jmdict is not None:
entries = self.jmdict.search_iter(query, pos=pos, ctx=ctx)
if lookup_chars and self.has_kd2():
chars_to_search = OrderedDict({c: c for c in query if c not in HIRAGANA and c not in KATAKANA})
chars = self.kd2.search_chars_iter(chars_to_search, ctx=ctx)
# lookup name-entities
if lookup_ne and self.has_jmne(ctx=ctx):
names = self.jmnedict.search_ne_iter(query, ctx=ctx)
# finish
return IterLookupResult(entries, chars, names)
class JMDictXML(object):
""" JMDict API for looking up information in XML
"""
def __init__(self, entries):
self.entries = entries
self._seqmap = {} # entryID - entryObj map
self._textmap = dd(set)
# compile map
for entry in self.entries:
self._seqmap[entry.idseq] = entry
for kn in entry.kana_forms:
self._textmap[kn.text].add(entry)
for kj in entry.kanji_forms:
self._textmap[kj.text].add(entry)
def __len__(self):
return len(self.entries)
def __getitem__(self, idx):
return self.entries[idx]
def lookup(self, a_query) -> Sequence[JMDEntry]:
if a_query in self._textmap:
return tuple(self._textmap[a_query])
elif a_query.startswith('id#'):
entry_id = a_query[3:]
if entry_id in self._seqmap:
return (self._seqmap[entry_id],)
# found nothing
return ()
@staticmethod
def from_file(filename):
parser = JMDictXMLParser()
return JMDictXML(parser.parse_file(os.path.abspath(os.path.expanduser(filename))))
class JMNEDictXML(JMDictXML):
pass
class KanjiDic2XML(object):
def __init__(self, kd2):
"""
"""
self.kd2 = kd2
self.char_map = {}
for char in self.kd2:
if char.literal in self.char_map:
getLogger().warning("Duplicate character entry: {}".format(char.literal))
self.char_map[char.literal] = char
def __len__(self):
return len(self.kd2)
def __getitem__(self, idx):
return self.kd2[idx]
def lookup(self, char):
if char in self.char_map:
return self.char_map[char]
else:
return None
@staticmethod
def from_file(filename):
parser = Kanjidic2XMLParser()
return KanjiDic2XML(parser.parse_file(filename))