1 """Parameters for hierarchical (including graphical) models
2
3 @var entry_width: Width in characters for windows allowing data entry.
4 @type entry_width: Int
5 @var epsilon: Tolerance when checking that probability distributions sum to 1.
6 @type epsilon: Float
7 @var precision: Precision for printing floating point numbers
8 @type precision: int
9 @var sep: String used to separate columns in textual representation of factors
10 @type sep: Int
11 @var _version: Version of this module
12 @type _version: String
13 """
14
15 _version = '$Id: Parameters.py,v 1.19 2008/10/07 09:12:30 jc Exp $'
18 """Base class for errors in the Parameters module"""
19 pass
20
23
26
27 import Tkinter
28 from Variables import SubDomain, extdiv, Domain
29 from copy import copy
30 import operator
31 try:
32 import gPyC
33 except ImportError:
34 print "No C functions"
35 import os
36
37 entry_width = 7
38 epsilon = 0.001
39 precision = 2
40 sep = ' | '
41
42
43
44
45
46
47
48
49
50
51
52 -class Factor(SubDomain):
53 """Factors represent functions from a discrete product space to the reals
54
55 Typically used to construct discrete probability distributions with
56 certain conditional independence properties
57 @ivar _data: A value for each joint instantiation of the factor's L{_variables}
58 @type _data: List
59 """
60
61 - def __init__(self,variables=(),data=None,domain=None,new_domain_variables=None,
62 must_be_new=False,check=False,convert=False):
63 """Initialise a L{Factor} object
64
65 @param variables: Variables in the factor. C{variables} can be any iterable object,
66 e.g. a list, tuple, set or frozenset. If C{variables} is a sequence (e.g. a list or tuple),
67 then the order of the variables in this sequence is ignored.
68 @type variables: Iterable
69 @param data: The numbers associated with the joint instantiations of the
70 variables. If None, then a list of 1.0s of the right size is created. The I{i}th value of C{data}
71 corresponds to the I{i}th joint instanstiation of the variables. To understand how instantiations
72 are ordered it may be enough to just print a factor and observe the ordering, but here is the formal definition.
73
74 Joint instantiations are ordered as follows: Each joint instantiation
75 is a tuple of values: I{val0,val1, ... valn} where I{valj} is the value of the I{j}th variable
76 in this joint instantiation and where variables are ordered according to Python's
77 standard ordering (i.e. lexicographically). The instantiations are then ordered just like any
78 other collection of Python tuples: let C{tuple1} and C{tuple2} be two different tuples, and let C{k}
79 be the index of the first element where they differ, then
80 C{tuple1 < tuple2} iff C{tuple1[k] < tuple2[k]}.
81 @type data: List, unless C{convert=True} or None
82 @param new_domain_variables: If not C{None}, a dictionary containing a mapping from any new
83 variables to their values.
84 @type new_domain_variables: Dict or None
85 @param domain: A domain for the model.
86 If None the internal default domain is used.
87 @type domain: L{Variables.Domain} or None
88 @param must_be_new: Whether domain variables in C{new_domain_variables} have
89 to be new
90 @type must_be_new: Boolean
91 @param check: Whether to check that
92 (1) C{variables} is of the right form, and (2) that each variable
93 has an associated set of values and (3) that C{data} is the right size and type.
94 @type check: Boolean
95 @param convert: If C{True}, C{data} is converted to a list.
96 @type convert: Boolean
97 @raise TypeError: If C{check} is set and C{convert} is not set and
98 C{data} is of the wrong type.
99 @raise VariableError: If C{check} is set and there is a variable in
100 C{variables} which does not have
101 associated values. Or If a variable in C{new_domain_variables}
102 already exists with values different from
103 its values in C{new_domain_variables};
104 Or if C{must_be_new} is set and the variable already exists.
105 @raise DataError: If C{check} is set and C{data} is the wrong size.
106 """
107 SubDomain.__init__(self,variables,domain,new_domain_variables,must_be_new,check)
108 if data is None or check:
109 size = 1
110
111 for variable in self._variables:
112 size *= self._numvals[variable]
113 if data is None:
114 data = [1.0] * size
115 if convert:
116 data = list(data)
117 if check:
118 if size != len(data):
119 raise DataError("Expected data of size %d, but got data of size %d" %
120 (size,len(data)))
121 if not isinstance(data,list):
122 raise TypeError("Data for a factor must be a list, try calling with convert = True")
123 self._data = data
124
125
127 """Return value associated with the instantiation C{inst}
128
129 C{inst} is either a list/tuple of values, or a dictionary mapping
130 variables to values, or a data index.
131
132 If a dictionary is used it can contain 'extra' keys which do
133 not correspond to variables of C{self}. These are just ignored.
134
135 Uses an internal dictionary which will be built if not already.
136 @param inst: Instantiation
137 @type inst: Tuple, List, Dictionary or Int
138 @return: The value in C{self} associated with C{inst}
139 @rtype: Float
140 @raise KeyError: If there is no joint instantiation C{inst}.
141 """
142 return self._data[self._decode_inst(inst)]
143
144
146 """Iterates over the factors in a factor
147
148 @return: Iterator over factor
149 @rtype: Iterator
150 @raise ValueError: If C{self} has no variables.
151 """
152 top = min(self._variables)
153 rest = self._variables - set([top])
154 chunk_size = len(self._data) / self._numvals[top]
155 for i in range(0,len(self._data),chunk_size):
156 yield Factor(rest,self._data[i:i+chunk_size],self)
157
159 """Return the number of values in a factor
160
161 @return: The number of values in a factor
162 @rtype: Int
163 """
164 return len(self._data)
165
167 return 'Factor(%s,%s,None,%s)' % (self._variables,
168 self._data,
169 self._domain)
170
172 """Set value associated with the instantiation C{inst}
173
174 C{inst} is either a list/tuple of values, or a dictionary mapping
175 variables to values, or a data index.
176
177 (Uses an internal dictionary which will be built if not already.)
178
179 @param inst: Instantiation
180 @type inst: Tuple, List, Dictionary or Int
181 @return: The value in C{self} associated with C{inst}
182 @rtype: Float
183 """
184 self._data[self._decode_inst(inst)] = val
185
187 """
188 Pretty representation of a factor
189
190 @return: Pretty representation of a factor
191 @rtype: String
192 """
193 fmt, dashes = self._header()
194 out = ('\n'
195 + fmt % tuple(sorted(self._variables)) + '\n'
196 + dashes + ('-' * (precision+2)) + '\n')
197 if isinstance(self._data[0],int):
198 rfmt = '%6d\n'
199 elif isinstance(self._data[0],float):
200 rfmt = '%%6.%df\n' % precision
201 else:
202 rfmt = '%s\n'
203 for i, inst in enumerate(self.insts()):
204 out += fmt % inst
205 out += rfmt % self._data[i]
206 return out
207
209 """Alter C{self} to be a function of C{variables}
210
211 Broadcasting data values as necessary
212 @param variables: New variables for C{self}
213 @type variables: Iterable
214 @return: Altered self
215 @rtype: L{Factor}
216 @raise ValueError: If C{variables} is not a superset of C{self}'s variables
217 """
218 variables = frozenset(variables)
219 if not (self._variables <= variables):
220 raise ValueError("%s not a subset of %s" % (self._variables,variables))
221 self._data = self._data_broadcast(self._data,
222 sorted(variables),
223 len(variables) - len(self._variables))
224 self._variables = variables
225 return self
226
227 - def copy(self,copy_domain=False):
228 """Return a 'copy' of a factor
229
230 @param copy_domain: If true C{self}'s domain is copied, otherwise the copy
231 shares C{self}'s domain
232 @type copy_domain: Boolean
233 @return: The copy
234 @rtype: L{Factor}
235 """
236 cp = SubDomain.copy(self,copy_domain)
237 cp.__class__ = Factor
238 cp._data = self._data[:]
239 return cp
240
242 """Return a 'copy' of a factor, but using different variables
243
244 For each variable pair old -> new, either:
245 1) new is a known variable (see L{Variables.Domain.known_variable}) with
246 the same values as old or
247 2) new is a new variable; in which case it will be given the same values
248 as old
249
250 @param oldnew: A mapping from old to new variables
251 @type oldnew: Dictionary
252 @param copy_domain: If true C{self}'s domain is copied, otherwise the copy
253 shares C{self}'s domain
254 @type copy_domain: Boolean
255 @return: The copy
256 @rtype: L{Factor}
257 @todo: Implement efficiently
258 """
259 cp = self.copy(copy_domain)
260
261 for old, new in oldnew.items():
262 if new in self._domain:
263 if self._domain[old] != self._domain[new]:
264 raise ValueError(
265 "%s and %s have different values: %s and %s, resp."
266 % (new, old, self._domain[old], self._domain[new]))
267 else:
268 cp.add_domain_variable(new,self._domain[old])
269
270
271 oldvars = sorted(self._variables)
272 newvars = [oldnew[old] for old in oldvars]
273 cp._variables = frozenset(newvars)
274
275
276 for inst in self.insts():
277 olddkt = dict(zip(oldvars,inst))
278 newdkt = dict(zip(newvars,inst))
279 cp[newdkt] = self[olddkt]
280 return cp
281
283 """Return a copy of the data in a L{Factor}
284
285 @return: A copy of the data in a L{Factor}
286 @rtype: List
287 """
288 return self._data[:]
289
291 """Alter a factor's B{data} by effecting the restriction on
292 variables given by C{newvalues}
293
294 Note that this does not alter the factor's domain
295 which has to therefore be done separately.
296 @param newvalues: Dictionary of the form {var1:values1,var2:values2..}.
297 Each value of this dictionary must be an iterable.
298 Values in an iterable which were not previously values of the corresponding variable
299 are ignored. Variables which are not used in the factor are also ignored. Upon return
300 each value in C{newvalues.values()} will be a frozenset.
301 @type newvalues: Dictionary
302 @param keep_class: Object will become a L{Factor} if false, otherwise class is unaltered.
303 @type keep_class: Boolean
304 @return: C{self}
305 @rtype: C{Factor}
306 """
307 togo = len(self._variables.intersection(newvalues))
308 self._data = self._data_restrict(sorted(self._variables),togo,newvalues,self._data)
309 if not keep_class:
310 self.__class__ = Factor
311 return self
312
314 """Alter a factor's B{data} by introducting new values to
315 a variable given by the dictionary C{newvalues}
316
317 @param newvalues: Dictionary of the form {var1:values1,var2:values2..}.
318 Each value of this dictionary must be an iterable.
319 Values in an iterable which were not previously values of the corresponding variable
320 are ignored. Variables which are not used in the factor are also ignored. Upon return
321 each value in C{newvalues.values()} will be a frozenset.
322 @type newvalues: Dictionary
323 @param keep_class: Object will become a L{Factor} if false, otherwise class is unaltered.
324 @type keep_class: Boolean
325 @return: C{self}
326 @rtype: C{Factor}
327 @author: Charles Blundell
328 """
329 if set(newvalues.keys()) - self._variables:
330 raise RuntimeError, 'Use .broadcast, not .data_extend'
331 togo = len(self._variables.intersection(newvalues))
332 before = len(self._data)
333 self._data = self._data_extend(sorted(self._variables),togo,newvalues,self._data)
334 after = len(self._data)
335 assert before <= after
336 if not keep_class:
337 self.__class__ = Factor
338 return self
339
340
342 """Explicit string representation of multiplying C{self} and C{other}
343
344 Resulting factor has strings such as '0.2 * 0.5'
345 instead of numbers as values, so cannot
346 be used for much else apart from displaying.
347
348 @param other: Factor on RHS of multiplication
349 @type other: L{Factor}
350 @return: Factor with strings as values
351 @rtype: L{Factor}
352 """
353 def strmul(x,y):
354 return '%s * %s' % (x,y)
355 return self.copy()._pointwise_op(other,strmul)
356
357 - def differ(self,other,epsilon=0.0):
358 """Whether there are corresponding values in C{self} and C{other} differing by
359 more than C{epsilon}
360
361 @param other: Factor
362 @type other: L{Factor}
363 @param epsilon: Permissible difference
364 @type epsilon: Float
365 @return: Whether they differ
366 @rtype: Boolean
367 @raise ValueError: If the two factors do not have the same number of data values.
368 """
369 od = other._data
370 sd = self._data
371 if len(od) != len(sd):
372 raise ValueError("Factors don't have the same number of data values")
373 for i, x in enumerate(sd):
374 if abs(x-od[i]) > epsilon:
375 return True
376 return False
377
379 """Alter self by dropping C{variable}
380
381 Should only be used on a for a variable
382 which is instantiated.
383
384 @param variable: Variable to drop
385 @type variable: Immutable
386 @param check_instd: Whether to check that it is safe to drop the variables
387 @type check_instd: Boolean
388 @return: The altered C{self}
389 @rtype: Class of C{self}
390 """
391 return self.drop_variables((variable,),check_instd)
392
393
395 """Alter self by dropping C{variables}
396
397 Should only be used for variables
398 which are instantiated.
399
400 C{variables} is not altered. Variables in C{variables}
401 which are not in C{self}'s variables are ignored.
402 @param variables: Variables to drop
403 @type variables: Sequence
404 @param check_instd: Whether to check that it is safe to drop the variables
405 @type check_instd: Boolean
406 @return: The altered C{self}
407 @rtype: Class of C{self}
408 """
409 if (check_instd and not self._instd.issuperset(variables)):
410 raise ValueError("%s not all instantiated. An uninstantiated variable can't be dropped, it must be summed out!" % variables)
411 return SubDomain.drop_variables(self,variables)
412
413
437
439 """Display a GUI widget for displaying a factor
440
441 @param parent: A widget into which the GUI is placed.
442 @type parent: Some suitable Tk object.
443 @param config: Optional extra configuration options to control how
444 GUI widget is displayed in C{parent}.
445 @type config: various
446 """
447 gui = Tkinter.Frame(parent)
448 gui.pack(config)
449 gui_main = self.gui_main(gui,edit=False)
450 gui_main.pack()
451 for (txt,cmd) in [('Done',gui.destroy)]:
452 button = Tkinter.Button(gui,text=txt,command=cmd)
453 button.bind('<Return>', lambda event: cmd())
454 button.pack()
455
456
458 """Display a GUI widget for editing a factor
459
460 @param parent: A widget into which the GUI is placed.
461 @type parent: Some suitable Tk object.
462 """
463 gui = Tkinter.Frame(parent)
464 gui.pack()
465 gui_main, data_indices, tkc_variables = self.gui_main(gui)
466 gui_main.pack()
467 gui_buttons = self.gui_buttons(gui,data_indices,tkc_variables)
468 gui_buttons.pack()
469
470
471 - def gui_main(self,parent,edit=True,varselect=False,**config):
472 """Return a C{Tkinter.Frame} (and possibly other information)
473 for displaying and editing a factor
474
475 If C{edit=True} then also return information needed for subsequent
476 editing
477 @param parent: The Frame's parent widget.
478 @type parent: Some suitable Tk object.
479 @param edit: If set, the Frame will allow editing of the factor's data
480 @type edit: Boolean
481 @param varselect: If set, the user can 'select' variables by clicking on their names
482 @type varselect: Boolean
483 @param config: Other parameters for the frame
484 @type config: Dictionary
485 @return: If C{edit=False} the frame widget. If C{edit=True} a tuple
486 C{(frame,data_indices, tkc_variables)} where the latter two are both identically
487 structured lists of lists; C{data_indices} indexes into the factor's data and
488 C{tkc_variables} contains C{Tkinter.StringVar} objects one for each data point
489 @rtype: Frame or Tuple
490 """
491 gui_main = Tkinter.Frame(parent,borderwidth=2,relief=Tkinter.GROOVE,**config)
492 data_indices = [[i] for i in range(len(self._data))]
493 if varselect:
494 self._gui_header(gui_main,sorted(self._variables),Tkinter.Button)
495 else:
496 self._gui_header(gui_main,sorted(self._variables),Tkinter.Label)
497 if edit:
498 tkc_variables = self._gui_edit_rows(gui_main,self.insts(),data_indices,1)
499 return gui_main, data_indices, tkc_variables
500 else:
501 self._gui_display_rows(gui_main,self.insts(),data_indices,1)
502 return gui_main
503
505 """Return h_score of factor with ESS = C{ess}
506
507 This is the log of the function called H in UAI08 paper.
508
509 Computes \sum_i log(\Gamma(n_i+\alpha)/\Gamma(\alpha))
510 where alpha = ess/(no. of values in self)
511 """
512 from gPyC import lgh
513 return lgh(self._data,ess/len(self._data))
514
515
517 """
518 Increment C{self} with counts directly from C{rawdata}
519
520 OK for parameter fitting not for structure learning
521 @param rawdata: A tuple like that returned by L{IO.read_csv}.
522 @type rawdata: Tuple
523 @raise IndexError: If C{self} has a variable missing from C{rawdata}
524 """
525 variables,records = rawdata[2:]
526 variables_info = []
527 size = len(self._data)
528 for variable in sorted(self._variables):
529 indx = variables.index(variable)
530 step = size/self.numvals(variable)
531 variables_info.append((indx,step))
532 size = step
533 for record in records:
534 self.inc_from_record(variables_info,record)
535
537 """Increment a data value in a factor from a data record
538
539 @param variables_info: A sequence of which each element
540 corresponds to a variable in C{self} (variables in order).
541 Each element is a pair C{(indx,step)} where
542
543 0. C{indx} is the index of the field in C{record} corresponding
544 to the variable
545
546 1. C{step} is the number of data values corresponding to any given
547 value of the variable
548 @type variables_info: Sequence
549 @param record: A data record (such as produced via L{IO.read_csv})
550 @type record: Sequence
551 """
552 i = 0
553 for variable_info in variables_info:
554 i += record[variable_info[0]] * variable_info[1]
555 self._data[i] += record[-1]
556
557
559 """Return instantiations with values
560
561 Returns a tuple of pairs. Each pair is of the form (inst,val)
562 each inst is a tuple giving a joint instantiation of the
563 variables.
564
565 If C{key} is not None, it should be a function which takes an
566 (inst,val) object as input. The (inst,val) pairs are ordered according
567 to the value of this function.
568 For example key = f where def f(x): return x[1] orders by value
569
570 If {key} is None, the order is the default one. Lexicographically on inst.
571
572 @return: Instantiations with values ordered by value
573 @rtype: Tuple
574 """
575 result = []
576 data = self._data
577 result = [(inst,data[i]) for i, inst in enumerate(self.insts())]
578 if keyfn is not None:
579 result.sort(key=keyfn,reverse=True)
580 return tuple(result)
581
582 - def map(self, fn, keep_class=False):
583 """Transform by an arity 1 function.
584 By default, C{self} is forced to a Factor, since rarely
585 will C{fn} result in anything more specific. C{keep_class} controls
586 this behaviour.
587
588 @param fn: An arity 1 function
589 @type fn: function between values of an element of the object (e.g., L{Parameters.Factor} values)
590 @param keep_class: Do not force C{self} to be a L{Parameters.Factor}
591 @type keep_class: Boolean (defaulting to False)
592 @rtype: Same as C{self}
593 @return: Result of applying the function
594 """
595 self._data = map(fn, self._data)
596 if not keep_class:
597 self.__class__ = Factor
598 return self
599
601 """Alter self by marginalising away C{variables}
602
603 C{variables} is not altered. Variables in C{variables}
604 which are not in C{self}'s variables are ignored.
605 @param variables: Variables to marginalise away
606 @type variables: Sequence
607 @return: The altered C{self}
608 @rtype: Class of C{self}
609 """
610 variables = frozenset(variables)
611 self._data = self._data_marginalise(
612 self._data,
613 sorted(self._variables),
614 self._variables & variables)
615 SubDomain.drop_variables(self,variables)
616 self.__class__ = Factor
617 return self
618
619 - def makeCPT(self,child,cpt_check=True):
620 """Construct an L{CPT} object from a factor
621
622 by just naming C{child} as the child variable.
623 Will raise an exception if the factor is not a CPT with C{child}
624 as the child
625 @param child: Variable of form (varname,(varvalue1,varvalue2,..))
626 @type child: tuple
627 @return: The CPT
628 @rtype: L{CPT} object
629 @raise CPTError: if the factor is not a CPT with C{child}
630 """
631 return CPT(self,child,cpt_check)
632
634 """Return a new factor with values proportional to those of self
635 but which sum to one
636
637 @return: Normalised version of C{self}
638 @rtype: Factor
639 """
640 def fn(x): return extdiv(x,float(self.z()))
641 return Factor(self._variables,map(fn,self._data),self)
642
643 - def repr_nodomain(self):
644 """Return string representation but without domain
645
646 @return: String representation but without domain given
647 @rtype: String
648 """
649 return 'Factor(%s,%s)' % (self._variables,self._data)
650
651
653 """Return the sum of the factor's values
654
655 @return: Sum of the factor's values
656 @rtype: float
657 """
658 return reduce(operator.add,self._data)
659
661 """Set all values for a factor to zero
662 """
663 self._data = [0] * len(self._data)
664
665
667 """Broadcast C{data} to make room for C{variables}
668
669 @param data: A sublist of C{self}'s data corresponding to an
670 instantiation of a prefix of C{self}'s ordered variables.
671 @type data: List
672 @param variables: An ordered sequence of variables which is a superset of
673 the suffix of C{self}'s ordered variables
674 @type variables: Sequence
675 @param n_extra: How many variables in C{variables} are not in the remaining
676 suffix of C{self}'s variables
677 @type n_extra: Int
678 @return: Broadcast data
679 @rtype: List
680 @raise KeyError: If C{variables} contains a variable which is not in C{self}'s
681 values dictionary
682 """
683 if n_extra == 0:
684 return data
685 variable, rest = variables[0], variables[1:]
686 if variable in self._variables:
687 chunk_size = len(data)/self._numvals[variable]
688 newdata = []
689 for i in range(0,len(data),chunk_size):
690 newdata.extend(self._data_broadcast(data[i:i+chunk_size],rest,n_extra))
691 return newdata
692 else:
693 return self._data_broadcast(data,rest,n_extra-1) * self._numvals[variable]
694
696 """Sum out variables C{to_sumout} from C{data}
697
698 Variables in C{to_sumout} not in C{variables} have no
699 effect
700 @param data: Data
701 @type data: List
702 @param variables: The (ordered) variables for C{data}
703 @type variables: Sequence
704 @param to_sumout: Variables to sum out
705 @type to_sumout: Set/frozenset
706 @return: Data with C{to_sumout} variables summed out
707 @rtype: List
708 """
709 if not to_sumout:
710 return data
711 if to_sumout.issuperset(variables):
712 return [reduce(operator.add,data)]
713 variable, rest = variables[0], variables[1:]
714 chunk_size = len(data)/self._numvals[variable]
715 if variable in to_sumout:
716 new_to_sumout = to_sumout - set([variable])
717 newdata = self._data_marginalise(data[:chunk_size],
718 rest,
719 new_to_sumout)
720 for i in range(chunk_size,len(data),chunk_size):
721 newdata = map(operator.add,
722 newdata,
723 self._data_marginalise(data[i:i+chunk_size],
724 rest,
725 new_to_sumout))
726 else:
727 newdata = []
728 for i in range(0,len(data),chunk_size):
729 newdata.extend(
730 self._data_marginalise(data[i:i+chunk_size],rest,to_sumout))
731 return newdata
732
733
735 """Alter C{data} to effect the restriction given by C{newvalues}
736
737 @param variables: The (sorted) variables for C{data}
738 @type variables: Sequence
739 @param togo: How many restrictions remain to be done
740 @type togo: Int
741 @param newvalues: Mapping from some variables to a subset of their original values
742 @type newvalues: Dictionary
743 @param data: The data to be restricted
744 @type data: List
745 @return: The data with the restriction given by C{newvalues} effected
746 @rtype: List
747 """
748 if togo == 0:
749 return data
750 variable, rest = variables[0], variables[1:]
751 chunk_size = len(data)/self._numvals[variable]
752 newdata = []
753 if variable in newvalues:
754 test = False
755 togo -= 1
756 these_newvalues = frozenset(newvalues[variable])
757 else:
758 test = True
759 for i, value in enumerate(sorted(self._domain[variable])):
760 if test or value in these_newvalues:
761 newdata.extend(
762 self._data_restrict(rest,togo,
763 newvalues,data[i*chunk_size:(i+1)*chunk_size]))
764 return newdata
765
767 """Alter C{data} to effect the extension given by C{newvalues}
768
769 @param variables: The (sorted) variables for C{data}
770 @type variables: Sequence
771 @param togo: How many extensions remain to be done
772 @type togo: Int
773 @param newvalues: Mapping from some variables to a subset of their original values
774 @type newvalues: Dictionary
775 @param data: The data to be restricted
776 @type data: List
777 @return: The data with the restriction given by C{newvalues} effected
778 @rtype: List
779 @author: Charles Blundell
780 """
781 if togo == 0:
782 return data
783 variable, rest = variables[0], variables[1:]
784 chunk_size = len(data)/self._numvals[variable]
785 newdata = []
786 old_values = sorted(self._domain[variable])
787 if variable not in newvalues:
788 for i, value in enumerate(old_values):
789 chunk = data[i*chunk_size:(i+1)*chunk_size]
790 newdata.extend(self._data_extend(rest,togo,newvalues,chunk))
791 return newdata
792
793 togo -= 1
794 values = sorted(newvalues[variable])
795 for value in values:
796 if value in self._domain[variable]:
797 i = old_values.index(value)
798 chunk = data[i*chunk_size:(i+1)*chunk_size]
799 else:
800 chunk = [0]*chunk_size
801
802 newdata.extend(self._data_extend(rest,togo,newvalues,chunk))
803
804 return newdata
805
806
807
808
810 """Apply binary operation C{op} to the factors C{self} and C{other}
811
812 Copies of C{self} and C{other} are 'broadcast' as necessary
813 C{self} is altered to contain the result.
814 C{self} and C{other} will have identical values dictionaries after
815 this method has been executed (Normally they already are identical).
816 @param other: Factor on the RHS of the operation
817 @type other: L{Factor}
818 @param op: A binary operation which takes numeric arguments
819 @type op: Function object
820 @raise VariableError: If C{self} and C{other} use a variable with different values
821 in each one's values dictionary.
822 """
823 result_variables = self._get_result_variables(other)
824 n_self_extra = len(result_variables - self._variables)
825 n_other_extra = len(result_variables - other._variables)
826 ordered_result_variables = sorted(result_variables)
827 self._data = map(
828 op,
829 self._data_broadcast(self._data,ordered_result_variables,n_self_extra),
830 other._data_broadcast(other._data,ordered_result_variables,n_other_extra)
831 )
832 self._variables = result_variables
833
834
836 """Add rows of joint instantiations and corresponding data
837 values for each to a factor's main GUI
838
839 @param gui_main: The GUI into which the rows will be added
840 @type gui_main: C{Tkinter.Frame}
841 @param insts: The joint instantiations (one for each row)
842 @type insts: Sequence (of sequences of strings)
843 @param data_indices: list of lists of indexes into a data list
844 @type data_indices: List
845 @param hrows: Number of header rows, and thus the
846 vertical offset for these rows
847 @type hrows: Int
848 """
849 for r, inst in enumerate(insts):
850 r2 = r + hrows
851 if inst:
852 for c, value in enumerate(inst):
853 Tkinter.Label(gui_main,text=value,relief=Tkinter.GROOVE,
854 justify=Tkinter.LEFT).grid(row=r2,column=c,sticky=Tkinter.NSEW)
855 c += 1
856 else:
857 Tkinter.Label(gui_main,text='',relief=Tkinter.GROOVE,
858 justify=Tkinter.LEFT).grid(row=r2,column=0,sticky=Tkinter.NSEW)
859 c = 1
860 for i, data_index in enumerate(data_indices[r]):
861 Tkinter.Label(gui_main,text=str(self._data[data_index]),
862 relief=Tkinter.RAISED).grid(row=r2,column=c+i,sticky=Tkinter.NSEW)
863
865 """Add rows of joint instantiations and data entry widgets
866
867 Initialises widgets to show current data values
868 @param gui_main: The GUI into which the rows will be added
869 @type gui_main: C{Tkinter.Frame}
870 @param insts: The joint instantiations (one for each row)
871 @type insts: Sequence (of sequences of strings)
872 @param data_indices: list of lists of indexes into a data list
873 @type data_indices: List
874 @param hrows: Number of header rows in C{gui_main}, and thus the
875 vertical offset for these rows
876 @type hrows: Int
877 @return: List of lists of C{Tkinter.StringVar} objects corresponding
878 to C{data_indices}
879 @rtype: List
880 """
881 tkc_variables = []
882 for r, inst in enumerate(insts):
883 r2 = r + hrows
884 if inst:
885 for c, value in enumerate(inst):
886 Tkinter.Label(gui_main,text=value,relief=Tkinter.GROOVE,
887 justify=Tkinter.LEFT).grid(row=r2,column=c,sticky=Tkinter.NSEW)
888 c += 1
889 else:
890 Tkinter.Label(gui_main,text='',relief=Tkinter.GROOVE,
891 justify=Tkinter.LEFT).grid(row=r2,column=0,sticky=Tkinter.NSEW)
892 c = 1
893 tkc_variables_row = []
894 for i, data_index in enumerate(data_indices[r]):
895 var = Tkinter.StringVar()
896 Tkinter.Entry(gui_main,textvariable=var,
897 width=entry_width).grid(row=r2,column=c+i,sticky=Tkinter.NSEW)
898 var.set(str(self._data[data_index]))
899 tkc_variables_row.append(var)
900 tkc_variables.append(tkc_variables_row)
901 return tkc_variables
902
904 """Add button or label headers to a factor's main GUI window
905
906 Buttons are bound to the L{_gui_select} method.
907 @param gui_main: The main GUI window to which buttons/labels are to be added
908 @type gui_main: C{Tkinter.Frame}
909 @param variables: Sequence of strings providing text for the labels/buttons
910 @type variables: Sequence
911 @param widget_type: Either Tkinter.Button or Tkinter.Label
912 @type widget_type: Tkinter.Button or Tkinter.Label Tkinter class object
913 @param firstcol: The column into which the first label/button goes
914 @type firstcol: Int
915 @param r: The row for the buttons/labels
916 @type r: Int
917 @param span: The column span of each button/label
918 @type span: Int
919 """
920 widgets = []
921 for c, variable in enumerate(variables):
922 widget = widget_type(gui_main,text=variable,
923 relief=Tkinter.RAISED,justify=Tkinter.LEFT)
924 widget.grid(row=r,column=c+firstcol,columnspan=span,sticky=Tkinter.NSEW)
925 widgets.append(widget)
926 if widget_type == Tkinter.Button:
927 for button in widgets:
928 button.config(command=lambda b=button: self._gui_select(b))
929
930 @staticmethod
931
933 """Change the appearance of a button
934
935 @param button: The button
936 @type button: C{Tkinter.Button}
937 """
938 if button['relief'] == Tkinter.RAISED:
939 button.config(relief=Tkinter.SUNKEN,fg='red',activeforeground='red')
940 else:
941 button.config(relief=Tkinter.RAISED,fg='black',activeforeground='black')
942
943 @staticmethod
944
946 """Clear the a list of lists of Entry widgets
947
948 @param tkc_variables: List of lists of C{Tkinter.StringVar} objects,
949 typically corresponding to factor data indices
950 @type tkc_variables: List
951
952 """
953 for tkc_variables_row in tkc_variables:
954 for var in tkc_variables_row:
955 var.set('')
956
958 """Change a factor's data to be that contained in a list of lists of
959 C{Tkinter.StringVar} objects and destroy the window C{win}
960
961 @param win: Window to destroy
962 @type win: Suitable Tkinter object
963 @param data_indices: list of lists of indexes into a data list
964 @type data_indices: List
965 @param tkc_variables: List of lists of C{Tkinter.StringVar} objects corresponding
966 to C{data_indices}
967 @type tkc_variables: List
968
969 """
970 self._gui_tkc_get(data_indices,tkc_variables)
971 win.destroy()
972
974 """Change a factor's data to be that contained in a list of lists of
975 C{Tkinter.StringVar} objects
976
977 @param data_indices: list of lists of indexes into a data list
978 @type data_indices: List
979 @param tkc_variables: List of lists of C{Tkinter.StringVar} objects corresponding
980 to C{data_indices}
981 @type tkc_variables: List
982
983 """
984 for r, tkc_variables_row in enumerate(tkc_variables):
985 data_indices2 = data_indices[r]
986 for i, var in enumerate(tkc_variables_row):
987 self._data[data_indices2[i]] = float(var.get())
988
990 """Set the data displayed by a list of lists of
991 C{Tkinter.StringVar} objects to be the factor's
992 data
993
994 @param data_indices: list of lists of indexes into a data list
995 @type data_indices: List
996 @param tkc_variables: List of lists of C{Tkinter.StringVar} objects corresponding
997 to C{data_indices}
998 @type tkc_variables: List
999
1000 """
1001 for r, tkc_variables_row in enumerate(tkc_variables):
1002 data_indices = data_indices[r]
1003 for i, var in enumerate(tkc_variables_row):
1004 var.set(str(self._data[data_indices[i]]))
1005
1007 """Return a format string and string of dashes suitable for creating a
1008 header for rows of instantiations of C{variables}
1009
1010 @param variables: Variables for which a header is sought. If C{None}
1011 then C{self}'s variables are used.
1012 @type variables: (Ordered) sequence
1013 @return: The format string and string of dashes
1014 @rtype: Tuple
1015 """
1016 if variables is None:
1017 variables = sorted(self._variables)
1018 fmt = dashes = ''
1019 def lenstr(x): return len(str(x))
1020 for varname in variables:
1021 width = max(len(str(varname)),max(map(lenstr,self.values(varname))))
1022 fmt += '%%-%ss%s' % (width,sep)
1023 dashes += '-' * width + sep
1024 return fmt, dashes
1025
1026
1028 """Apply binary operation C{op} to the factor C{self} and C{other}.
1029 C{self} is altered to contain the result
1030
1031 After this operation has been applied, C{self} is always set to have
1032 class L{Factor}, since if it were originally a L{CPT} there is no guarantee
1033 that it still is one.
1034
1035 C{other} may be a factor or a scalar
1036 @param other: Factor on the RHS of the operation
1037 @type other: L{Factor}, Float or Int
1038 @param op: A binary operation which takes numeric arguments
1039 @type op: Function object
1040 @param swapped: If set and C{other} is a number return
1041 C{other op self}
1042 @type swapped: Boolean
1043 @return: C{self}
1044 @rtype: L{Factor}
1045 """
1046 if isinstance(other,(float,int)):
1047 if swapped:
1048 def fn(x): return op(other,x)
1049 else:
1050 def fn(x): return op(x,other)
1051 self._data = map(fn,self._data)
1052 else:
1053 self._factor_pointwise_op(other,op)
1054 self.__class__ = Factor
1055 return self
1056
1057
1058
1059
1060 -class CPT(Factor):
1061 """Conditional probability tables"""
1062
1063 - def __init__(self,factor,child,
1064 cpt_check=False,cpt_force=False,allow_dummies=False):
1065 """Initialise an L{CPT} object
1066
1067 @param factor: The CPT as a L{Factor}
1068 @type factor: L{Factor} object
1069 @param child: The child of the CPT
1070 @param cpt_check: Whether to check that this factor is indeed
1071 a CPT with C{child} as child
1072 @type cpt_check: Boolean
1073 @param cpt_force: Whether to convert the factor into a CPT
1074 by normalisation
1075 @type cpt_force: Boolean
1076 @param allow_dummies: If C{cpt_force=True}, whether to put a dummy row
1077 when when normalisation produces a division by zero.
1078 @type allow_dummies: Boolean
1079 @raise ZeroDivisionError: If C{cpt_force=True, allow_dummies=False}
1080 and normalisation produces a division by zero
1081 """
1082 self.__dict__ = factor.__dict__
1083 self._child = child
1084 if cpt_check and child not in self._variables:
1085 raise CPTError(
1086 "Child %s missing from %s" % (child,self._variables))
1087 if cpt_check or cpt_force:
1088 for data_indices in self.parent_insts_indices():
1089 prob_sum = 0.0
1090 for indx in data_indices:
1091 prob_sum += self._data[indx]
1092 if abs(1-prob_sum) > epsilon:
1093 if cpt_force:
1094 if allow_dummies and not prob_sum > 0:
1095 for indx in data_indices:
1096 self._data[indx] = 0.0
1097 else:
1098 for indx in data_indices:
1099 self._data[indx] /= prob_sum
1100 else:
1101 errmsg = '\nFor child:\t%s\n' % child
1102
1103 errmsg += 'For row:\t%s\n' % data_indices
1104 errmsg += 'Sum was:\t%4.2f (should be 1.0)\n' % prob_sum
1105
1106 raise CPTError(errmsg)
1107
1109
1110
1111 if (isinstance(inst,(tuple,list,dict)) and
1112 len(inst) == len(self._variables) - 1):
1113 if isinstance(inst,dict):
1114 inst = [inst[name] for name in sorted(self.parents())]
1115 inst = tuple(inst)
1116 it = self.parent_insts_data()
1117 for pi in self.parent_insts():
1118 data = it.next()
1119 if pi == inst:
1120 return CPT(Factor([self._child],data,self),self._child)
1121 else:
1122 return Factor.__getitem__(self,inst)
1123
1125 return 'CPT(%s,%s)' % (Factor.__repr__(self),
1126 repr(self._child))
1127
1129 """
1130 Pretty representation of a CPT
1131
1132 @return: Pretty representation of a CPT
1133 @rtype: String
1134 """
1135 parents = sorted(self._variables - set([self._child]))
1136 fmt, dashes = self._header(parents)
1137 rwidth = 0
1138 rfmt = childvalheader = ''
1139 for childvalue in sorted(self._domain[self._child]):
1140 width = max(6,len(str(childvalue))+2)
1141 childvalheader += '%*s' % (width,childvalue)
1142 dashes += '-' * width
1143 rfmt += '%%%d.%df' % (width,precision)
1144 rwidth += width
1145 rfmt += '\n'
1146 out = ('\n'
1147 + (fmt % tuple(parents)) + str(self._child).center(rwidth) + '\n'
1148 + (fmt % tuple([''] * len(parents))) + childvalheader + '\n'
1149 + dashes + '\n')
1150 itr = self.parent_insts_indices()
1151 for parent_inst in self.parent_insts():
1152 out += fmt % parent_inst
1153 out += rfmt % tuple([self._data[i] for i in itr.next()])
1154 return out
1155
1157 """Return the BDeu component score corresponding to C{self}
1158
1159 This is the score for the family represented by the parents and child
1160 of C{self} and where the values of C{self} are interpreted as data counts for the
1161 relevant instantiations.
1162
1163 The BDeu score for a Bayesian network is the sum of component scores for each of its CPTs.
1164
1165 @param precision: The prior precision used to set the Dirichlet parameters
1166 @type precision: Float
1167 @return: BDeu score for family
1168 @rtype: Float
1169 @raise NameError: If L{gPyC} was not successfully imported
1170 """
1171 return gPyC.family_bdeu(self._getinterval(),self._numvals[self._child],
1172 self._data,precision)
1173
1174
1176 """Return the child in a CPT
1177
1178 @return: The child in a CPT
1179 @rtype: Immutable (usually String)
1180 """
1181 return self._child
1182
1183 - def copy(self,copy_domain=False):
1185
1189
1191 """
1192 Initialise the CPT for future sampling
1193
1194 Should also be called if C{self} has been altered since
1195 this method has last been called.
1196 """
1197 from Samplers import MultinomialSampler
1198 samplers = {}
1199 values = sorted(self._domain[self._child])
1200 itr = self.parent_insts_data()
1201 for parent_inst in self.parent_insts():
1202 probs = itr.next()
1203 samplers[tuple(parent_inst)] = MultinomialSampler(dict(zip(values,probs)))
1204 self._samplers = samplers
1205
1207 """Return a 'CPT' with same structure as C{self} but with appropriate
1208 counts from C{data} as its values
1209
1210 @param data: The data where the counts are
1211 @type data: L{CompactFactor}
1212 @return: A 'CPT' with counts from C{data} as values
1213 @rtype: L{CPT}
1214 """
1215 return data.makeFactor(self._variables).makeCPT(self._child,cpt_check=False)
1216
1218 """Return a 'CPT' with same structure as C{self} but with appropriate
1219 counts from C{data} as its values
1220
1221 @param data: The data where the counts are
1222 @type data: L{SqliteFactor}
1223 @return: A 'CPT' with counts from C{data} as values
1224 @rtype: L{CPT}
1225 """
1226 return data.makeFactor(self._variables).makeCPT(self._child,cpt_check=False)
1227
1228
1236
1237 - def gui_main(self,parent,edit=True,varselect=False,**config):
1238 gui_main = Tkinter.Frame(parent,borderwidth=2,relief=Tkinter.GROOVE,**config)
1239 data_indices = [data_row for data_row in self.parent_insts_indices()]
1240 if varselect:
1241 widget_type = Tkinter.Button
1242 else:
1243 widget_type = Tkinter.Label
1244 parents = sorted(self.parents())
1245 if not parents:
1246 parents = ['']
1247 self._gui_header(gui_main,parents,widget_type)
1248 self._gui_header(gui_main,[self._child],widget_type,firstcol=len(parents),
1249 r=0,span=self._numvals[self._child])
1250 self._gui_header(gui_main,sorted(self.values(self._child)),
1251 Tkinter.Label,firstcol=len(parents),
1252 r=1)
1253 if edit:
1254 tkc_variables = self._gui_edit_rows(gui_main,self.parent_insts(),data_indices,2)
1255 return gui_main, data_indices, tkc_variables
1256 else:
1257 self._gui_display_rows(gui_main,self.parent_insts(),data_indices,2)
1258 return gui_main
1259
1261 """Return the K2 component score corresponding to C{self}
1262
1263 The score used for Cooper and Herskovits's K2 algorithm.
1264
1265 The K2 score for a Bayesian network is the sum of component scores for each of its CPTs.
1266
1267 @return: K2 component score
1268 @rtype: Float
1269 @raise NameError: If L{gPyC} was not successfully imported
1270 """
1271
1272 return gPyC.family_llh(self._getinterval(),self._numvals[self._child],
1273 self._data,[1.0]*len(self._data))
1274
1275 - def llh(self,priors):
1276 """Return the marginal log-likelihood component score corresponding to C{self}
1277
1278 This is the marginal log-likelihood component score for the
1279 family represented by the parents and child of C{self} and
1280 where the values of C{self} are interpreted as data counts for
1281 the relevant instantiations.
1282
1283 Since there are no constraints on C{priors} likelihood equivalence may not obtain and so
1284 this score need not be a BDe score.
1285
1286 The marginal log-likelihood for a Bayesian network is the sum of component scores for each of its CPTs.
1287
1288 @param priors: A L{CPT} object with the same structure as C{self}
1289 where each value is the Dirichlet parameter for its corresponding instantiation.
1290 @type priors: L{CPT} object
1291 @return: Marginal log-likelihood component score
1292 @rtype: Float
1293 @raise NameError: If L{gPyC} was not successfully imported
1294 """
1295 return gPyC.family_llh(self._getinterval(),self._numvals[self._child],
1296 self._data,priors._data)
1297
1299 """Return the parents in a CPT
1300
1301 @return: The parents in a CPT
1302 @rtype: Frozenset
1303 """
1304 return self._variables - set([self._child])
1305
1307 """Return an iterator over the joint instantiations of the parents
1308 in a CPT
1309
1310 @return: Iterator over parent instantiations
1311 @rtype: Iterator
1312 """
1313 return self.insts(sorted(self._variables - set([self._child])))
1314
1316 """Return an iterator where each iteration is the
1317 data corresponding to the corresponding parent instantiation
1318
1319 @return: Iterator over rows of the data
1320 @rtype: Iterator
1321 """
1322 for indices in self.parent_insts_indices():
1323 yield [self._data[i] for i in indices]
1324
1326 """Return an iterator where each iteration is a list of
1327 data indices corresponding to the corresponding parent instantiation
1328
1329 @return: Iterator over data indices
1330 @rtype: Iterator
1331 """
1332 interval = self._getinterval()
1333 step = interval * self._numvals[self._child]
1334 for start in range(0,len(self._data),step):
1335 for j in range(start,start+interval,1):
1336 yield range(j,j+step,interval)
1337
1339 if list(variables) == [self._child]:
1340 self._data = [1.0] * (len(self._data)/self._numvals[self._child])
1341 self._variables = self._variables - set([self._child])
1342 del self._child
1343 self.__class__ = Factor
1344 else:
1345 Factor.marginalise_away(self,variables)
1346 return self
1347
1348 - def repr_nodomain(self):
1349 """Return string representation but without domain
1350
1351 @return: String representation but without domain given
1352 @rtype: String
1353 """
1354 return 'CPT(%s,%s)' % (Factor.repr_nodomain(self),repr(self._child))
1355
1356 - def sample(self,parent_inst):
1357 """
1358 Sample a child value conditional on parent values
1359
1360 L{initialise_sampler} must have been called since C{self}
1361 was created or last altered.
1362
1363 @param parent_inst: An instantiation of the parents (in
1364 standard order) as a tuple of values.
1365 @type parent_inst: Tuple
1366 @return: A value of the child
1367 @rtype: Somthing immutable, usually a string
1368 @raise AttributeError: If L{initialise_sampler} has not been
1369 previously called.
1370 """
1371 return self._samplers[parent_inst].sample()
1372
1374 """Return how far apart child values (for a given
1375 parent configuration) are in C{self}'s data
1376 """
1377 interval = len(self._data)
1378 for variable in sorted(self._variables):
1379 interval /= self._numvals[variable]
1380 if variable == self._child:
1381 return interval
1382
1383 @staticmethod
1384
1386 """Normalise rows for a CPT GUI
1387
1388 @param tkc_variables: List of lists of C{Tkinter.StringVar} objects,
1389 typically corresponding to factor data indices
1390 @type tkc_variables: List
1391 """
1392 for tkc_variables_row in tkc_variables:
1393 total = 0.0
1394 nums = []
1395 for var in tkc_variables_row:
1396 num = float(var.get())
1397 total += num
1398 nums.append(num)
1399 for i, var in enumerate(tkc_variables_row):
1400 var.set(str(nums[i]/total))
1401