flucoma-core/include/flucoma/algorithms/public/NMFCross.hpp at 63d4b157432a8f33a3e051eccda54be63df6e8f0 · tremblap/flucoma-core · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
/*
Part of the Fluid Corpus Manipulation Project (http://www.flucoma.org/)
Copyright University of Huddersfield.
Licensed under the BSD-3 License.
See license.md file in the project root for full license information.
This project has received funding from the European Research Council (ERC)
under the European Union’s Horizon 2020 research and innovation programme
(grant agreement No 725899).
*/

// Jonathan Driedger, Thomas Prätzlich, and Meinard Müller
// Let It Bee — Towards NMF-Inspired Audio Mosaicing
// Proceedings of ISMIR 2015

#pragma once

#include "STFT.hpp"
#include "../util/EigenRandom.hpp"
#include "../util/FluidEigenMappings.hpp"
#include "../../data/FluidIndex.hpp"
#include "../../data/TensorTypes.hpp"
#include <Eigen/Core>
#include <Eigen/Dense>
#include <iostream>
#include <vector>

namespace fluid {
namespace algorithm {

using _impl::asEigen;
using _impl::asFluid;
using Eigen::Array;
using Eigen::ArrayXd;
using Eigen::ArrayXXd;
using Eigen::Matrix;
using Eigen::MatrixXd;
using Eigen::VectorXd;

class NMFCross
{

public:
  // pass iteration number; returns true if able to continue (i.e. not
  // cancelled)
  using ProgressCallback = std::function<bool(index)>;

  NMFCross(index nIterations) : mIterations(nIterations) {}

  static void synthesize(const RealMatrixView h, const ComplexMatrixView w,
                         ComplexMatrixView out)
  {
    using namespace Eigen;
    using namespace _impl;
    MatrixXd  H = asEigen<Matrix>(h);
    MatrixXcd W = asEigen<Matrix>(w);
    MatrixXcd V = H * W;
    out <<= asFluid(V);
  }

  void process(const RealMatrixView X, RealMatrixView H1, RealMatrixView W0,
               index r, index p, index c, index randomSeed = -1) const
  {
    index nFrames = X.extent(0);
    index nBins = X.extent(1);
    index rank = W0.extent(0);
    nBins = W0.extent(1);
    MatrixXd W = asEigen<Matrix>(W0).transpose();
    MatrixXd H;
    H = EigenRandom<MatrixXd>(rank, nFrames, RandomSeed{randomSeed},
                Range{0.0, 1.0});
    MatrixXd V = asEigen<Matrix>(X).transpose();
    multiplicativeUpdates(V, W, H, r, p, c);
    MatrixXd HT = H.transpose();
    H1 <<= asFluid(HT);
  }

  void addProgressCallback(ProgressCallback&& callback)
  {
    mCallbacks.emplace_back(std::move(callback));
  }

private:
  index                         mIterations;
  std::vector<ProgressCallback> mCallbacks;

  std::vector<index> topC(Eigen::VectorXd vec, index c) const
  {
    using namespace std;
    vector<double> stdVec(vec.data(), vec.data() + vec.size());
    sort(stdVec.begin(), stdVec.end());
    vector<index> idx(asUnsigned(vec.size()));
    iota(idx.begin(), idx.end(), 0);
    sort(idx.begin(), idx.end(),
         [&vec](index i1, index i2) { return vec[i1] > vec[i2]; });
    auto result = std::vector<index>(idx.begin(), idx.begin() + c);
    return result;
  }

  Eigen::MatrixXd promoteContinuity(MatrixXd& H, index size) const
  {
    index    halfSize = (size - 1) / 2;
    MatrixXd kernel = MatrixXd::Identity(size, size);
    MatrixXd padded = MatrixXd::Zero(H.rows() + size, H.cols() + size);
    MatrixXd output = MatrixXd::Zero(H.rows(), H.cols());
    padded.block(halfSize, halfSize, H.rows(), H.cols()) = H;
    for (index i = 0; i < H.rows(); i++)
    {
      for (index j = 0; j < H.cols(); j++)
      {
        output(i, j) =
            padded.block(i, j, size, size).cwiseProduct(kernel).sum();
      }
    }
    return output;
  }

  Eigen::MatrixXd enforceTemporalSparseness(MatrixXd& H, index size,
                                            index iteration) const
  {
    index    halfSize = (size - 1) / 2;
    MatrixXd padded = MatrixXd::Zero(H.rows(), H.cols() + size);
    MatrixXd output = MatrixXd::Zero(H.rows(), H.cols());
    padded.block(0, halfSize, H.rows(), H.cols()) = H;
    for (index i = 0; i < H.rows(); i++)
    {
      for (index j = 0; j < H.cols(); j++)
      {
        VectorXd        neighborhood = padded.row(i).segment(j, size);
        VectorXd::Index maxIndex{0};
        neighborhood.maxCoeff(&maxIndex);
        if (int(maxIndex) != halfSize)
        { output(i, j) = H(i, j) * (1 - ((iteration + 1) / mIterations)); }
        else
        {
          output(i, j) = H(i, j);
        }
      }
    }
    return output;
  }


  Eigen::MatrixXd restrictPolyphony(MatrixXd& H, ArrayXd& energyInW, index size,
                                    index iteration) const
  {
    MatrixXd output = MatrixXd::Zero(H.rows(), H.cols());
    for (index k = 0; k < H.cols(); k++)
    {
      ArrayXd wCol = H.col(k).array() * energyInW.array();
      output.col(k) = H.col(k) * (1 - ((iteration + 1) / mIterations));
      auto top = topC(wCol, size);
      for (auto t : top) { output(t, k) = H(t, k); }
    }
    return output;
  }
  void multiplicativeUpdates(MatrixXd& V, MatrixXd& W, MatrixXd& H, index r,
                             index p, index c) const
  {
    using namespace std;
    using namespace Eigen;
    double const epsilon = std::numeric_limits<double>::epsilon();
    MatrixXd     ones = MatrixXd::Ones(V.rows(), V.cols());
    W = W.array().max(epsilon).matrix();
    // ArrayXd wNorm = W.colwise().sum();
    // W.array().rowwise() /= wNorm.transpose());
    ArrayXd energyInW = W.array().square().colwise().sum();
    for (index i = 0; i < mIterations; i++)
    {
      if ((i % 1) == 0)
      { // TODO: original version seems to work better with one in 5 iterations
        H = enforceTemporalSparseness(H, r, i);
        H = restrictPolyphony(H, energyInW, p, i);
        H = promoteContinuity(H, c);
      }
      ArrayXXd V2 = (W * H).array().max(epsilon);
      ArrayXXd hnum = (W.transpose() * (V.array() / V2).matrix()).array();
      ArrayXXd hden = (W.transpose() * ones).array();
      H = (H.array() * hnum / hden.max(epsilon)).matrix();
      // MatrixXd R = W * H;
      // R = R.cwiseMax(epsilon);
      // double divergence = (V.cwiseProduct(V.cwiseQuotient(R)) - V + R).sum();
      for (auto& cb : mCallbacks)
        if (!cb(i + 1)) return;
    }
    V = W * H;
  }
};
} // namespace algorithm
} // namespace fluid