Logo Search packages:      
Sourcecode: ibutils version File versions  Download package

Congestion.cpp

/*
 * Copyright (c) 2004 Mellanox Technologies LTD. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 * $Id$
 */

#include "Fabric.h"
#include "SubnMgt.h"
#include "TraceRoute.h"
#include <iomanip>
#include <fstream>
#include <sstream>
#include <map>

using namespace std;

/*
 * This file provides an API for analyzing congestion on the fabric
 * Link over subscription statistics are being provided as follows:
 *
 * CongInit(p_fabric) - initialize the data structures
 * CongZero(p_fabric) - zero out the paths per link counter
 * CongTrackPath(p_fabric, srcLid, dstLid) - trace one path
 * CongReport(p_fabric, stream &out) - report histogram
 * CongDump(p_fabric, fileName) - dump out the paths
 * CongCleanup(p_fabric) - cleanup
 *
 * The implemented algorithm is based on availablity of the FDBs
 */

// the main data structure we keeo is the list if paths going out
// on every out port of every node.
typedef list< pair< uint16_t, uint16_t> > list_src_dst;
typedef map< IBPort *, list_src_dst, less<IBPort * > > map_pport_paths;
typedef map< IBPort *, int , less<IBPort * > > map_pport_int;
typedef map< int, float > map_int_float;

// for each fabric we keep:
class CongFabricData {
public:
   map_pport_paths portPaths;
      map_pport_int   portNumPaths;
   long            numPaths;
   int             stageWorstCase;
   int             worstWorstCase;
   list<int>       stageWorstCases;
   vec_int         numPathsHist;
   IBPort         *p_worstPort;
      int             maxRank;
   CongFabricData() {
      stageWorstCase = 0;
      worstWorstCase = 0;
      p_worstPort = NULL;
      numPaths = 0;
            maxRank = 0;
   };
};

typedef map< IBFabric*, CongFabricData, less< IBFabric *> > map_pfabric_cong;

map_pfabric_cong CongFabrics;

// Initialize the congestion tracking for the fabric.
int  CongInit(IBFabric *p_fabric)
{
   if (CongFabrics.find(p_fabric) != CongFabrics.end())
   {
      cout << "-E- Congestion Tracker already initialized" << endl;
      return(1);
   }

   CongFabrics[p_fabric] = CongFabricData();
   map_pport_paths &portPaths = CongFabrics[p_fabric].portPaths;

   // init the vector for each connected port of the fabric:
   IBNode *p_node;
   IBPort *p_port;
   for( map_str_pnode::iterator nI = p_fabric->NodeByName.begin();
        nI != p_fabric->NodeByName.end();
        nI++)
   {
      p_node = (*nI).second;
            if (p_node->rank > CongFabrics[p_fabric].maxRank)
                  CongFabrics[p_fabric].maxRank = p_node->rank;

      for( unsigned int pn = 1; pn <= p_node->numPorts; pn++)
      {
         p_port = p_node->getPort(pn);
         if (!p_port || ! p_port->p_remotePort) continue;

         portPaths[p_port] = list_src_dst();
      }
   }
   return(0);
}

// Cleanup...
int
CongCleanup(IBFabric *p_fabric)
{
   // get the reference to the actual data structure:
   map_pfabric_cong::iterator cI = CongFabrics.find(p_fabric);
   if (cI == CongFabrics.end())
   {
      cout << "-E- Congestion Tracker not previously initialized" << endl;
      return(1);
   }

   // remove the entry should delete the entire structure:
   CongFabrics.erase(cI);
   return(0);
}

// analyze a single stage. We propagte a floating point fraction of
// link BW with each src,dst pair such that cong apear only on first
// link a contention run through

int
CongZero(IBFabric *p_fabric)
{
      map_int_float dst_frac;
      int going_up = 1;

   // get the reference to the actual data structure:
   map_pfabric_cong::iterator cI = CongFabrics.find(p_fabric);
   if (cI == CongFabrics.end())
   {
      cout << "-E- Congestion Tracker not previously initialized." << endl;
      return(1);
   }

   CongFabricData &congData = (*cI).second;
      congData.stageWorstCase = 0;

      // start from all leaf switches and walk up then down again
      for (int rank = congData.maxRank; rank >= -congData.maxRank; rank--) {
            int numPortsInLevel = 0;
            if (!rank) going_up = 0;

            for (map_str_pnode::iterator nI = p_fabric->NodeByName.begin();
                    nI != p_fabric->NodeByName.end();
                    nI++) {
                  IBNode *p_node = (*nI).second;

                  // we treat nodes with rank equals to absolute rank index
                  if (p_node->rank != abs(rank)) continue;

                  for (unsigned int pn = 1; pn <= p_node->numPorts; pn++) {
                        IBPort *p_port = p_node->getPort(pn);

                        // do we have a port on the other side ?
                        if (!p_port || !p_port->p_remotePort) continue;

                        // if we go up ignore the port if not going up
                        if (going_up &&
                               ( (p_port->p_remotePort->p_node->type == IB_CA_NODE) ||
                                    (p_port->p_remotePort->p_node->rank >= rank) ) )
                              continue;

                        // if we go down ignore the port if not going down
                        if (!going_up &&
                               (p_port->p_remotePort->p_node->type == IB_SW_NODE) &&
                               (p_port->p_remotePort->p_node->rank <= -rank))
                              continue;

                        numPortsInLevel++;
                        map_pport_paths::iterator pI = congData.portPaths.find(p_port);

                        // now see what pairs are routed through the port
                        float sumFracs = 0;
            for (list_src_dst::iterator lI = (*pI).second.begin();
                 lI != (*pI).second.end();
                 lI++) {
                              int dst = (*lI).second;

                              // find the fraction for that destination:
                              map_int_float::iterator fI = dst_frac.find(dst);
                              if (fI == dst_frac.end()) {
                                    dst_frac[dst] = 1.0;
                                    sumFracs += 1.0;
                              } else {
                                    sumFracs += dst_frac[dst];
                              }
                        }

                        // update statistics
                        int numPaths = (int)sumFracs;
                        congData.portNumPaths[p_port] = numPaths;

                        for(unsigned int i = congData.numPathsHist.size(); i <= numPaths; i++)
                              congData.numPathsHist.push_back(0);
                        congData.numPathsHist[numPaths]++;

                        if (congData.stageWorstCase < numPaths)
                              congData.stageWorstCase = numPaths;

                        // debug:
                        if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
                        {
                              if (sumFracs > 1.0)
                              {
                                    cout << "-V- port:" << (*pI).first->getName() << " Coliding:";
                                    for (list_src_dst::iterator lI = (*pI).second.begin();
                                            lI != (*pI).second.end();
                                            lI++)
                                          cout << (*lI).first << "," << (*lI).second
                                                  << "(" << dst_frac[(*lI).second] << ") ";
                                    cout << endl;
                              }
                        }

                        // update fractions
                        if (sumFracs > 1.0) {
                              for (list_src_dst::iterator lI = (*pI).second.begin();
                                      lI != (*pI).second.end();
                                      lI++) {
                                    int dst = (*lI).second;

                                    dst_frac[dst] = dst_frac[dst] / sumFracs;
                              }
                        }

                        // clear the list for the future ...
                        (*pI).second.clear();

                  } // ports in right direction
            } // all nodes
            if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
                  cout << "-V- Scanned rank:" << rank << " ports:" << numPortsInLevel << endl;
      } // ranks

   congData.stageWorstCases.push_back(congData.stageWorstCase);

      return (0);
}

// Track a single path
int
CongTrackPath(IBFabric *p_fabric, uint16_t srcLid, uint16_t dstLid)
{
   // get the reference to the actual data structure:
   map_pfabric_cong::iterator cI = CongFabrics.find(p_fabric);
   if (cI == CongFabrics.end())
   {
      cout << "-E- Congestion Tracker not previously initialized" << endl;
      return(1);
   }

   CongFabricData &congData = (*cI).second;

   // find the source and destination ports:
   IBPort *p_srcPort = p_fabric->getPortByLid(srcLid);
   if (! p_srcPort)
   {
      cout << "-E- Fail to find port by source LID:" << srcLid << endl;
      return(1);
   }

   IBPort *p_dstPort = p_fabric->getPortByLid(dstLid);
   if (! p_dstPort)
   {
      cout << "-E- Fail to find port by destination LID:" << dstLid << endl;
      return(1);
   }

   pair< uint16_t, uint16_t> pathPair(srcLid, dstLid);

   IBNode *p_node;
   IBPort *p_port = p_srcPort;
   IBPort *p_remotePort = NULL;
   int hopCnt = 0;

   if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
   {
      cout << "-V-----------------------------------------------------" << endl;
      cout << "-V- Tracing from lid:" << srcLid << " to lid:"
           << dstLid << endl;
   }

   // if the port is not a switch - go to the next switch:
   if (p_port->p_node->type != IB_SW_NODE)
   {
      // try the next one:
      if (!p_port->p_remotePort)
      {
         cout << "-E- Provided starting point is not connected !"
              << "lid:" << srcLid << endl;
         return 1;
      }

      p_remotePort = p_port->p_remotePort;
      p_node = p_remotePort->p_node;
      hopCnt++;
      if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
         cout << "-V- Arrived at Node:" << p_node->name
              << " Port:" << p_port->p_remotePort->num << endl;
   }
   else
   {
      // it is a switch :
      p_node = p_port->p_node;
   }

   // verify we are finally on a switch:
   if (p_node->type != IB_SW_NODE)
   {
      cout << "-E- Provided starting point is not connected to a switch !"
           << "lid:" << srcLid << endl;
      return 1;
   }

   // traverse:
   int done = 0;
   while (!done) {

      // we need to store this info for marking later
      list_src_dst &lst = congData.portPaths[p_port];
      lst.push_back(pathPair);
      if (lst.size() > congData.stageWorstCase)
      {
         congData.stageWorstCase = lst.size();
         if (congData.stageWorstCase > congData.worstWorstCase)
         {
            congData.worstWorstCase = congData.stageWorstCase;
            congData.p_worstPort = p_port;
            if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
            {
               cout << endl;
               cout << "-I- Found Worst Port:" << p_port->getName()
                    << " paths:" << lst.size() << endl;
               for (list_src_dst::iterator lI = lst.begin();
                    lI != lst.end(); lI++)
                  cout << "  from:" << (*lI).first << " to:"
                       << (*lI).second << endl;
            }
         }
      }

      // calc next node:
      int pn = p_node->getLFTPortForLid(dstLid);
      if (pn == IB_LFT_UNASSIGNED)
      {
         cout << "-E- Unassigned LFT for lid:" << dstLid
              << " Dead end at:" << p_node->name << endl;
         return 1;
      }

      // if the port number is 0 we must have reached the target node.
      // simply try see that p_remotePort of last step == p_dstPort
      if (pn == 0)
      {
         if (p_dstPort != p_remotePort)
         {
            cout << "-E- Dead end at port 0 of node:" << p_node->name << endl;
            return 1;
         }
         return 0;
      }

      // get the port on the other side
      p_port = p_node->getPort(pn);

      if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
         cout << "-V- Going out on port:" << pn << endl;

      if (! (p_port &&
             p_port->p_remotePort &&
             p_port->p_remotePort->p_node)) {
         cout << "-E- Dead end at:" << p_node->name << endl;
         return 1;
      }

      if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
         cout << "-V- Arrived at Node:" << p_port->p_remotePort->p_node->name
              << " Port:" << p_port->p_remotePort->num << endl;

      p_remotePort = p_port->p_remotePort;

      // check if we are done:
      done = (p_remotePort == p_dstPort);

      p_node = p_remotePort->p_node;
      if (hopCnt++ > 256)
      {
         cout << "-E- Aborting after 256 hops - loop in LFT?" << endl;
         return 1;
      }
   }

   congData.numPaths++;
   return(0);
}

// Report the worst usage and a histogram of link usage
int
CongReport(IBFabric *p_fabric, ostringstream &out)
{
   int worstWorstPath = 0;
   map<int, int, less<int> > stageWorstCaseHist;

   // get the reference to the actual data structure:
   map_pfabric_cong::iterator cI = CongFabrics.find(p_fabric);
   if (cI == CongFabrics.end())
   {
      cout << "-E- Congestion Tracker not previously initialized" << endl;
      return(1);
   }

   CongFabricData &congData = (*cI).second;

   // collect the histogram of stage worst paths
   for(list<int>::iterator lI = congData.stageWorstCases.begin();
       lI != congData.stageWorstCases.end();
       lI++)
   {
      stageWorstCaseHist[*lI]++;
      if (worstWorstPath < *lI) worstWorstPath = (*lI);
   }

   out << "---------------------------------------------------------------------------\n" << endl;
   out << "-I- Traced total:" << congData.numPaths << " paths" << endl;

   out << "-I- Worst link over subscrition:" << worstWorstPath
       << " port:" << congData.p_worstPort->getName() << endl;

   out << "---------------------- TOTAL CONGESTION HISTOGRAM ------------------------" << endl;
   out << "Describes distribution of oversubscription of paths per port." << endl;
   out << "NUM-PATHS NUM-OUT-PORTS" << endl;
   for (int b = 0; b < congData.numPathsHist.size() ; b++)
      if (congData.numPathsHist[b])
         out << setw(4) << b << "   " << congData.numPathsHist[b] << endl;
   out << "---------------------------------------------------------------------------\n" << endl;

   out << "---------------------- STAGE CONGESTION HISTOGRAM ------------------------" << endl;
   out << "Describes distribution of worst oversubscription of paths per stage." << endl;
   out << "WORST-CONG NUM-STAGES" << endl;
   for (map<int, int, less<int> >::iterator bI = stageWorstCaseHist.begin();
        bI != stageWorstCaseHist.end();
        bI++)
      out << setw(4) << (*bI).first << "   " << (*bI).second << endl;
   out << "---------------------------------------------------------------------------\n" << endl;
   return(0);
}

// Dump out all link usages and details if available into
// the given ostream
int
CongDump(IBFabric *p_fabric, ostringstream &out)
{
   // get the reference to the actual data structure:
   map_pfabric_cong::iterator cI = CongFabrics.find(p_fabric);
   if (cI == CongFabrics.end())
   {
      cout << "-E- Congestion Tracker not previously initialized" << endl;
      return(1);
   }

   CongFabricData &congData = (*cI).second;

   // go over all ports and dump out their paths...
   for (map_pport_paths::iterator pI = congData.portPaths.begin();
        pI != congData.portPaths.end();
        pI++)
   {
      out << "PORT:" << (*pI).first->getName()
                   << " NUM:" << congData.portNumPaths[(*pI).first] << endl;
      for ( list_src_dst::iterator lI = (*pI).second.begin();
            lI != (*pI).second.end();
            lI++)
         out << (*lI).first << " " << (*lI).second << endl;
   }
   return(0);
}


Generated by  Doxygen 1.6.0   Back to index