Source code for corehq.motech.openmrs.finders

"""
PatientFinders are used to find OpenMRS patients that correspond to
CommCare cases if none of the patient identifiers listed in
OpenmrsCaseConfig.match_on_ids have successfully matched a patient.

See `README.md`__ for more context.
"""
import logging
from collections import namedtuple
from functools import partial
from pprint import pformat

from dimagi.ext.couchdbkit import (
    DecimalProperty,
    DictProperty,
    DocumentSchema,
    ListProperty,
)

from corehq.motech.finders import (
    MATCH_FUNCTIONS,
    PropertyWeight,
)
from corehq.motech.openmrs.const import OPENMRS_DATA_TYPE_BOOLEAN
from corehq.motech.value_source import (
    deserialize,
    recurse_subclasses,
)

logger = logging.getLogger(__name__)


constant_false = {
    "value": 'False',
    # We are fetching from a case property or a form question value, and
    # we want `get_value()` to return False (bool). `get_value()`
    # serialises case properties and form question values as external
    # data types. OPENMRS_DATA_TYPE_BOOLEAN is useful because it is a
    # bool, not a string, so `constant_false.get_value()` will return
    # False (not 'False')
    "external_data_type": OPENMRS_DATA_TYPE_BOOLEAN,
}


[docs]class PatientFinder(DocumentSchema): """ The ``PatientFinder`` base class was developed as a way to handle situations where patient cases are created in CommCare instead of being imported from OpenMRS. When patients are imported from OpenMRS, they will come with at least one identifier that MOTECH can use to match the case in CommCare with the corresponding patient in OpenMRS. But if the case is registered in CommCare then we may not have an ID, or the ID could be wrong. We need to search for a corresponding OpenMRS patient. Different projects may focus on different kinds of case properties, so it was felt that a base class would allow some flexibility. The ``PatientFinder.wrap()`` method allows you to wrap documents of subclasses. The ``PatientFinder.find_patients()`` method must be implemented by subclasses. It returns a list of zero, one, or many patients. If it returns one patient, the OpenmrsRepeater.find_or_create_patient() will accept that patient as a true match. .. NOTE:: The consequences of a false positive (a Type II error) are severe: A real patient will have their valid values overwritten by those of someone else. So ``PatientFinder`` subclasses should be written and configured to skew towards false negatives (Type I errors). In other words, it is much better not to choose a patient than to choose the wrong patient. """ # Whether to create a new patient if no patients are found create_missing = DictProperty(default=constant_false) @classmethod def wrap(cls, data): if 'create_missing' in data and isinstance(data['create_missing'], bool): data['create_missing'] = { 'external_data_type': OPENMRS_DATA_TYPE_BOOLEAN, 'value': str(data['create_missing']) } if cls is PatientFinder: subclass = { sub._doc_type: sub for sub in recurse_subclasses(cls) }.get(data['doc_type']) return subclass.wrap(data) if subclass else None else: return super(PatientFinder, cls).wrap(data) def find_patients(self, requests, case, case_config): """ Given a case, search OpenMRS for possible matches. Return the best results. Subclasses must define "best". If just one result is returned, it will be chosen. """ raise NotImplementedError
PatientScore = namedtuple('PatientScore', ['patient', 'score'])
[docs]class WeightedPropertyPatientFinder(PatientFinder): """ The ``WeightedPropertyPatientFinder`` class finds OpenMRS patients that match CommCare cases by assigning weights to case properties, and adding the weights of matching patient properties to calculate a confidence score. """ # Identifiers that are searchable in OpenMRS. e.g. # [ 'bahmni_id', 'household_id', 'last_name'] searchable_properties = ListProperty() # The weight assigned to a matching property. # [ # {"case_property": "bahmni_id", "weight": 0.9}, # {"case_property": "household_id", "weight": 0.9}, # { # "case_property": "dob", # "weight": 0.75, # "match_type": "days_diff", # // days_diff matches based on days difference from given date # "match_params": [364] # }, # { # "case_property": "first_name", # "weight": 0.025, # "match_type": "levenshtein", # // levenshtein function takes edit_distance / len # "match_params": [0.2] # // i.e. 20% is one edit for every 5 characters # // e.g. "Riyaz" matches "Riaz" but not "Riazz" # }, # {"case_property": "last_name", "weight": 0.025}, # {"case_property": "municipality", "weight": 0.2}, # ] property_weights = ListProperty(PropertyWeight) # The threshold that the sum of weights must pass for a CommCare case to # be considered a match to an OpenMRS patient threshold = DecimalProperty(default=1.0) # If more than one patient passes `threshold`, the margin by which the # weight of the best match must exceed the weight of the second-best match # to be considered correct. confidence_margin = DecimalProperty(default=0.667) # Default: Matches two thirds better than second-best def __init__(self, *args, **kwargs): super(WeightedPropertyPatientFinder, self).__init__(*args, **kwargs) self._property_map = {} def get_score(self, patient, case): """ Return the sum of weighted properties to give an OpenMRS patient a score of how well they match a CommCare case. """ def weights(): for property_weight in self.property_weights: prop = property_weight['case_property'] jsonpath, value_source_dict = self._property_map[prop] weight = property_weight['weight'] matches = jsonpath.find(patient) for match in matches: patient_value = match.value case_value = case.get_case_property(prop) match_type = property_weight['match_type'] match_params = property_weight['match_params'] match_function = partial(MATCH_FUNCTIONS[match_type], *match_params) is_equivalent = match_function(deserialize(value_source_dict, patient_value), case_value) yield weight if is_equivalent else 0 return sum(weights()) def find_patients(self, requests, case, case_config): """ Matches cases to patients. Returns a list of patients, each with a confidence score >= self.threshold """ from corehq.motech.openmrs.openmrs_config import get_property_map from corehq.motech.openmrs.repeater_helpers import search_patients self._property_map = get_property_map(case_config) candidates = {} # key on OpenMRS UUID to filter duplicates for prop in self.searchable_properties: value = case.get_case_property(prop) if value: response_json = search_patients(requests, value) for patient in response_json['results']: score = self.get_score(patient, case) if score >= self.threshold: candidates[patient['uuid']] = PatientScore(patient, score) if not candidates: logger.info( 'Unable to match case "%s" (%s): No candidate patients found.', case.name, case.get_id, ) return [] if len(candidates) == 1: patient = list(candidates.values())[0].patient logger.info( 'Matched case "%s" (%s) to ONLY patient candidate: \n%s', case.name, case.get_id, pformat(patient, indent=2), ) return [patient] patients_scores = sorted(candidates.values(), key=lambda candidate: candidate.score, reverse=True) if patients_scores[0].score / patients_scores[1].score > 1 + self.confidence_margin: # There is more than a `confidence_margin` difference # (defaults to 66.7%) in score between the best-ranked # patient and the second-best-ranked patient. Let's go with # Patient One. patient = patients_scores[0].patient logger.info( 'Matched case "%s" (%s) to BEST patient candidate: \n%s', case.name, case.get_id, pformat(patients_scores, indent=2), ) return [patient] # We can't be sure. Just send them all. logger.info( 'Unable to match case "%s" (%s) to patient candidates: \n%s', case.name, case.get_id, pformat(patients_scores, indent=2), ) return [ps.patient for ps in patients_scores]