mathDict Example

mathDict_example.py Source:

from ParallelRegression import *

A = [457.641, 269.473, 666.114, 738.578, 384.412, 436.901, 616.734,
     800.865, 799.396, 338.704, 647.796, 530.703, 464.395, 580.856,
     740.477, 262.685, 902.082, 455.818, 260.115, 736.013, 506.098,
     752.046, 789.867, 643.288, 637.688, 735.963, 366.387, 341.807,
     302.911, 498.582, 813.982, 814.821, 420.031, 448.213, 307.052,
     430.88,  416.088, 845.837, 410.257, 386.846, 910.287, 824.109,
     391.376, 632.234, 620.801, 395.589, 777.192, 613.002, 442.574,
     534.007, 845.773, 365.875, 357.484, 465.222, 598.116, 546.189,
     387.921, 796.199, 660.886, 504.647, 334.781, 426.913, 460.404,
     835.071, 704.785, 799.473, 362.1,   493.072, 457.163, 819.701,
     619.863, 691.317, 385.449, 451.347, 336.794, 372.412, 453.459,
     419.277, 416.966, 772.945, 943.82,  520.426, 345.733, 411.217,
     740.681, 682.992, 719.678, 727.342, 597.919, 512.705, 593.008,
     588.302, 690.788, 908.377, 704.167, 698.847, 433.792, 453.611,
     526.038, 382.376]

B = [119.301,  66.349, 161.152, 145.175,  69.001, 219.116,  94.441,
     213.002,  92.834, 100.679, 164.569,  80.688,  92.686, 219.442,
     221.164,  43.919, 194.728, 213.813,  52.219, 154.47,  135.639,
     186.396, 222.365, 137.722,  82.308, 126.674, 163.315,  52.624,
     100.181, 213.997, 181.496, 164.274, 124.197, 195.611,  26.147,
     164.143, 155.979, 126.052, 169.598, 190.142, 234.881, 138.879,
     141.201, 111.1,   157.582, 134.78,  162.129, 165.268, 233.181,
     110.312, 197.514,  54.899, 133.201, 235.463,  37.009, 237.319,
     136.888, 174.981, 143.82,  130.934, 110.247, 189.622, 203.057,
     265.556, 209.429, 127.223, 126.889, 180.801, 201.507, 199.916,
     111.492, 117.431, 108.005, 171.611, 117.93,  179.91,  222.877,
     197.046, 256.175, 153.089, 220.919, 219.835, 101.607, 156.517,
     186.866, 142.049, 189.638, 180.149, 108.815, 217.281, 111.797,
     176.722, 194.262, 227.524,  71.405, 168.751, 180.718, 159.217,
     214.491, 160.46]

C = [199.38,  111.518, 171.771, 195.467, 149.69,  129.294, 199.866,
     162.954, 250.934, 144.716,  75.313, 113.178, 173.147, 176.945,
      70.004, 164.992, 240.852, 193.629, 175.825, 164.11,  209.412,
      87.323,  78.069, 120.363, 189.156, 252.551, 120.92,  216.863,
     130.409, 244.084, 169.927, 134.425, 109.229, 126.777, 100.834,
      92.531, 183.025, 274.818, 199.981, 169.116, 208.509, 249.877,
     146.664, 203.326, 131.02,  119.461, 144.568, 182.734, 219.753,
     154.387, 123.408, 167.475, 145.907,  94.822, 235.145,  62.2,
     157.767, 234.263, 115.903,  70.69,  145.207, 166.503, 179.489,
     129.545, 70.273,  192.28,  188.573, 191.433, 217.216, 186.478,
     172.187, 124.913, 100.57,  261.621, 122.669, 218.585,  90.753,
     163.686, 144.649, 205.134, 234.896, 261.634, 222.179, 215.568,
      83.608, 183.83,  103.642,  89.629, 205.168, 208.448, 209.43,
     149.044,  81.287, 231.098, 147.611,  31.635, 143.063, 234.67,
      87.556, 166.715]

D = [195.67,  129.138, 156.052, 109.087, 153.751, 127.445, 120.588,
     131.854, 173.856, 133.698, 158.244, 150.328, 176.716, 137.254,
     172.858, 122.604, 172.383, 161.954, 130.858, 175.53,  166.751,
     113.442, 124.344, 130.758, 149.935, 132.201, 113.225, 128.479,
     128.796, 173.129, 154.809, 192.431, 174.448, 223.659, 173.727,
     143.076, 170.036, 120.376, 125.871, 115.621, 185.208, 154.787,
     158.684, 150.494, 112.307, 145.116, 148.797, 100.41,  133.531,
     109.915, 183.138, 167.564, 160.118, 146.176, 166.684, 168.459,
     163.29,  135.122, 133.735, 127.535, 111.317, 134.029, 190.334,
     116.149, 118.005, 147.545, 107.594, 183.479, 127.041, 161.095,
     131.097, 178.309, 164.199, 123.923, 124.991, 121.905, 130.822,
     100.141, 103.182, 138.563, 145.071, 168.989, 143.085, 141.522,
     149.141, 138.302, 151.767, 162.769, 137.202, 140.127, 142.806,
      78.273, 128.285, 157.013, 141.791, 142.812, 139.937, 174.723,
     146.152, 133.782]

d1 = [0,0,1,1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,0,1,0,1,1,1,1,1,0,0,0,0,1,
      1,0,0,0,0,0,1,0,0,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,0,0,
      0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,1,1,0,1,1,1,
      1,1,1,0,0,0,0]

if __name__ == "__main__":
    mdMaker = mathDictMaker( )
    mdMaker['A']  = A
    mdMaker['B']  = B
    mdMaker['C']  = C
    mdMaker['D']  = D
    mdMaker['d1'] = d1
    SharedDataArray, mDict = mdMaker.make( cache_crossproducts=True,
                                           cache_powers=2 )

QueueWorker and QueueWorkerHypothesis can be used interchangeably in the line starting ‘p = Process’ to try the different examples:

from worker import QueueWorker, QueueWorkerHypothesis
from multiprocessing import Queue, Process
ProcessQueue = Queue( )
ReturnQueue = Queue( )
procList = list( )
for i in range( 2 ):
    p = Process( target=QueueWorkerHypothesis,
                 args=(ProcessQueue,
                       SharedDataArray,
                       ReturnQueue)
                 )
    p.start( )
    procList.append( p )

mDictCfg = mDict.config_to_dict( )
for let in {'A', 'B', 'C', 'D'}:
    mDictNew = mDictCfg.rebuild( SharedDataArray )
    mDictNew.set_mask( let )
    tpl = (mDictNew.config_to_dict( ), let)
    ProcessQueue.put( tpl )

for i in range( len( procList ) ):
    ProcessQueue.put( 'Terminate.' )

terminationCount = 0
while terminationCount < len( procList ):
    QueueObject = ReturnQueue.get( )
    if QueueObject == 'Terminated.':
        terminationCount += 1
    print( QueueObject )

worker.py Source:

from ParallelRegression import *

d2 = [0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,
      1,0,0,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,1,0,1,1,0,0,1,1,0,0,1,0,
      1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,
      0,0,0,1,0,1,0]

import statsmodels.api
def QueueWorker( ProcessQueue, SharedDataArray, ReturnQueue ):
    QueueObject = ProcessQueue.get( )
    while QueueObject != 'Terminate.':
        mDictCfg, let = QueueObject
        mDict = mDictCfg.rebuild( SharedDataArray )
        mDict['d2'] = d2
        model = statsmodels.api.OLS( mDict[let], mDict[:]
                                    ).fit( cov_type='HC0' )
        ret = ' + '.join( mDict.columns ) + ' => ' + \
              ' + '.join( [str( p ) for p in model.params] )
        ReturnQueue.put( ret )
        QueueObject = ProcessQueue.get( )
    ReturnQueue.put( 'Terminated.' )

def QueueWorkerHypothesis( ProcessQueue, SharedDataArray, ReturnQueue ):
    mapLHS_RHS = {'A': 'B', 'B': 'C', 'C': 'D', 'D': 'A'}
    QueueObject = ProcessQueue.get( )
    while QueueObject != 'Terminate.':
        mDictCfg, let = QueueObject
        mDict = mDictCfg.rebuild( SharedDataArray )
        mDict['d2'] = d2
        mDict.set_mask( 'd2' )
        mDict.hypothesis.add( 'd2' )
        mDict.hypothesis.add( 'd2 * %s' % mapLHS_RHS[let] )
        mDict.hypothesis.add( '%s ** 2' % mapLHS_RHS[let] )
        X, R, r = mDict.hypothesis.make( )
        model = statsmodels.api.OLS( mDict[let], X ).fit( cov_type='HC0' )
        u = model.resid
        beta = model.params
        F_stat = FStatistic( X, u, beta, R, r )
        ret = 'Hypothesis that in modeling %s, columns: `d2`, `d2 * %s`' \
            ', and `%s ** 2` are all 0 has an F statistic of %.3f.' \
            % (let, mapLHS_RHS[let], mapLHS_RHS[let], F_stat)
        ReturnQueue.put( ret )
        QueueObject = ProcessQueue.get( )
    ReturnQueue.put( 'Terminated.' )

QueueWorker Output:

Intercept + A + B + C + d1 + d2 => 112.685540322 + 0.223817620334 + -0.249513977434 + -0.106076169993 + -71.4369309379 + -7.38778376659
Intercept + B + C + D + d1 + d2 => -54.5647897751 + 1.12424317191 + 0.530330297026 + 1.36579120342 + 311.067932614 + 18.1086372194
Intercept + A + C + D + d1 + d2 => 95.7100429924 + 0.508495046485 + -0.307037597337 + -0.688670348879 + -158.160160309 + -0.492666217004
Terminated.
Intercept + A + B + D + d1 + d2 => 153.25155005 + 0.449976021676 + -0.575980823121 + -0.54922563694 + -142.118879576 + -14.5960961384
Terminated.

QueueWorkerHypothesis Output:

Hypothesis that in modeling D, columns: `d2`, `d2 * A`, and `A ** 2` are all 0 has an F statistic of 3.450.
Hypothesis that in modeling A, columns: `d2`, `d2 * B`, and `B ** 2` are all 0 has an F statistic of 2.998.
Hypothesis that in modeling C, columns: `d2`, `d2 * D`, and `D ** 2` are all 0 has an F statistic of 2.292.
Hypothesis that in modeling B, columns: `d2`, `d2 * C`, and `C ** 2` are all 0 has an F statistic of 0.097.
Terminated.
Terminated.