distance.go 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. package gago
  2. import (
  3. "fmt"
  4. "math"
  5. )
  6. // A Metric returns the distance between two genomes.
  7. type Metric func(a, b Individual) float64
  8. // A DistanceMemoizer computes and stores Metric calculations.
  9. type DistanceMemoizer struct {
  10. Metric Metric
  11. Distances map[string]map[string]float64
  12. nCalculations int // Total number of calls to Metric for testing purposes
  13. }
  14. // newDistanceMemoizer initializes a DistanceMemoizer.
  15. func newDistanceMemoizer(metric Metric) DistanceMemoizer {
  16. return DistanceMemoizer{
  17. Metric: metric,
  18. Distances: make(map[string]map[string]float64),
  19. }
  20. }
  21. // GetDistance returns the distance between two Individuals based on the
  22. // DistanceMemoizer's Metric field. If the two individuals share the same ID
  23. // then GetDistance returns 0. DistanceMemoizer stores the calculated distances
  24. // so that if GetDistance is called twice with the two same Individuals then
  25. // the second call will return the stored distance instead of recomputing it.
  26. func (dm *DistanceMemoizer) GetDistance(a, b Individual) float64 {
  27. // Check if the two individuals are the same before proceding
  28. if a.ID == b.ID {
  29. return 0
  30. }
  31. // Create maps if the genomes have never been encountered
  32. if _, ok := dm.Distances[a.ID]; !ok {
  33. dm.Distances[a.ID] = make(map[string]float64)
  34. } else {
  35. // Check if the distance between the two genomes has been calculated
  36. if dist, ok := dm.Distances[a.ID][b.ID]; ok {
  37. return dist
  38. }
  39. }
  40. if _, ok := dm.Distances[b.ID]; !ok {
  41. dm.Distances[b.ID] = make(map[string]float64)
  42. }
  43. // Calculate the distance between the genomes and memoize it
  44. var dist = dm.Metric(a, b)
  45. dm.Distances[a.ID][b.ID] = dist
  46. dm.Distances[b.ID][a.ID] = dist
  47. dm.nCalculations++
  48. return dist
  49. }
  50. // calcAvgDistances returns a map that associates the ID of each provided
  51. // Individual with the average distance the Individual has with the rest of the
  52. // Individuals.
  53. func calcAvgDistances(indis Individuals, dm DistanceMemoizer) map[string]float64 {
  54. var avgDistances = make(map[string]float64)
  55. for _, a := range indis {
  56. for _, b := range indis {
  57. avgDistances[a.ID] += dm.GetDistance(a, b)
  58. }
  59. avgDistances[a.ID] /= float64(len(indis) - 1)
  60. }
  61. return avgDistances
  62. }
  63. func rebalanceClusters(clusters []Individuals, dm DistanceMemoizer, minPerCluster int) error {
  64. // Calculate the number of missing Individuals per cluster for each cluster
  65. // to reach at least minPerCluster Individuals.
  66. var missing = make([]int, len(clusters))
  67. for i, cluster := range clusters {
  68. // Check that the cluster has at least one Individual
  69. if len(cluster) == 0 {
  70. return fmt.Errorf("Cluster %d has 0 individuals", i)
  71. }
  72. // Calculate the number of missing Individual in the cluster to reach minPerCluster
  73. missing[i] = minPerCluster - len(cluster)
  74. }
  75. // Check if there are enough Individuals to rebalance the clusters.
  76. if sumInts(missing) >= 0 {
  77. return fmt.Errorf("Missing %d individuals to be able to rebalance the clusters",
  78. sumInts(missing))
  79. }
  80. // Loop through the clusters that are missing Individuals
  81. for i, cluster := range clusters {
  82. // Check if the cluster is missing Individuals
  83. if missing[i] <= 0 {
  84. continue
  85. }
  86. // Assign new Individuals to the cluster while it is missing some
  87. for missing[i] > 0 {
  88. // Determine the medoid
  89. cluster.SortByDistanceToMedoid(dm)
  90. var medoid = cluster[0]
  91. // Go through the Individuals of the other clusters and find the one
  92. // closest to the computed medoid
  93. var (
  94. cci int // Closest cluster index
  95. cii int // Closest Individual index
  96. minDist = math.Inf(1)
  97. )
  98. for j := range clusters {
  99. // Check that the cluster has Individuals to spare
  100. if i == j || missing[j] >= 0 {
  101. continue
  102. }
  103. // Find the closest Individual to the medoid inside the cluster
  104. for k, indi := range clusters[j] {
  105. var dist = dm.GetDistance(indi, medoid)
  106. if dist < minDist {
  107. cci = j
  108. cii = k
  109. minDist = dist
  110. }
  111. }
  112. }
  113. // Add the closest Individual to the cluster
  114. clusters[i] = append(clusters[i], clusters[cci][cii])
  115. // Remove the closest Individual from the cluster it belonged to
  116. clusters[cci] = append(clusters[cci][:cii], clusters[cci][cii+1:]...)
  117. missing[i]--
  118. }
  119. }
  120. return nil
  121. }