diff --git a/README.md b/README.md index b96b6e2..8f124d5 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,17 @@ # How-to-Generate-Music-Demo This is the code for "How to Generate Music - Intro to Deep Learning #9' by Siraj Raval on YouTube -##Overview +## Overview This is the code for [this]() video on Youtube by Siraj Raval as part of the the Udacity Deep Learning Nanodegree. It uses Keras & Theano, two deep learning libraries, to generate jazz music. Specifically, it builds a two-layer LSTM, learning from the given MIDI file. -##Dependencies +## Dependencies * [Keras](http://keras.io/#installation) * [Theano](http://deeplearning.net/software/theano/install.html#bleeding-edge-install-instructions) ("bleeding-edge" version on GitHub) * [music21](http://web.mit.edu/music21/doc/installing/index.html) -##Usage +## Usage Run on CPU with command: ``` @@ -31,6 +31,6 @@ Note: `preprocess.py` must be modified to work with other MIDI files (the releva The challenge is to generate your own MIDI file! This code trains off of a single MIDI file and the preprocess.py file manually selects the relevant melody part. Modify it so that it selects the melody from your own MIDI file. Bonus points if you train it on not one, but multiple MIDI files. Through training and testing this code, you'll witness just how powerful LSTM networks are and further understand the generative process. Good luck! -##Credits +## Credits The credits for this code go to [Ji Sung Kim](https://github.com/jisungk/deepjazz). I've merely created a wrapper to get people started. diff --git a/generator.py b/generator.py index f2c0369..31087b5 100644 --- a/generator.py +++ b/generator.py @@ -26,33 +26,44 @@ from qa import * import lstm -#----------------------------HELPER FUNCTIONS----------------------------------# +# ----------------------------HELPER FUNCTIONS----------------------------------# ''' Helper function to sample an index from a probability array ''' + + def __sample(a, temperature=1.0): a = np.log(a) / temperature a = np.exp(a) / np.sum(np.exp(a)) - return np.argmax(np.random.multinomial(1, a, 1)) + a = np.log(a) / temperature + dist = np.exp(a) / np.sum(np.exp(a)) + choices = range(len(a)) + return np.random.choice(choices, p=dist) + # return np.argmax(np.random.multinomial(1, a, 1)) + ''' Helper function to generate a predicted value from a given matrix ''' + + def __predict(model, x, indices_val, diversity): preds = model.predict(x, verbose=0)[0] next_index = __sample(preds, diversity) next_val = indices_val[next_index] - return next_val + ''' Helper function which uses the given model to generate a grammar sequence from a given corpus, indices_val (mapping), abstract_grammars (list), and diversity floating point value. ''' + + def __generate_grammar(model, corpus, abstract_grammars, values, val_indices, indices_val, max_len, max_tries, diversity): curr_grammar = '' # np.random.randint is exclusive to high start_index = np.random.randint(0, len(corpus) - max_len) - sentence = corpus[start_index: start_index + max_len] # seed + sentence = corpus[start_index: start_index + max_len] # seed running_length = 0.0 - while running_length <= 4.1: # arbitrary, from avg in input file + while running_length <= 4.1: # arbitrary, from avg in input file # transform sentence (previous sequence) to matrix x = np.zeros((1, max_len, len(values))) for t, val in enumerate(sentence): @@ -64,12 +75,12 @@ def __generate_grammar(model, corpus, abstract_grammars, values, val_indices, # fix first note: must not have < > and not be a rest if (running_length < 0.00001): tries = 0 - while (next_val.split(',')[0] == 'R' or - len(next_val.split(',')) != 2): + while (next_val.split(',')[0] == 'R' or + len(next_val.split(',')) != 2): # give up after 1000 tries; random from input's first notes if tries >= max_tries: - print('Gave up on first note generation after', max_tries, - 'tries') + print('Gave up on first note generation after', max_tries, + 'tries') # np.random is exclusive to high rand = np.random.randint(0, len(abstract_grammars)) next_val = abstract_grammars[rand].split(' ')[0] @@ -79,7 +90,7 @@ def __generate_grammar(model, corpus, abstract_grammars, values, val_indices, tries += 1 # shift sentence over with new value - sentence = sentence[1:] + sentence = sentence[1:] sentence.append(next_val) # except for first case, add a ' ' separator @@ -91,9 +102,12 @@ def __generate_grammar(model, corpus, abstract_grammars, values, val_indices, return curr_grammar -#----------------------------PUBLIC FUNCTIONS----------------------------------# + +# ----------------------------PUBLIC FUNCTIONS----------------------------------# ''' Generates musical sequence based on the given data filename and settings. Plays then stores (MIDI file) the generated output. ''' + + def generate(data_fn, out_fn, N_epochs): # model settings max_len = 20 @@ -110,7 +124,7 @@ def generate(data_fn, out_fn, N_epochs): print('total # of values:', len(values)) # build model - model = lstm.build_model(corpus=corpus, val_indices=val_indices, + model = lstm.build_model(corpus=corpus, val_indices=val_indices, max_len=max_len, N_epochs=N_epochs) # set up audio stream @@ -126,14 +140,14 @@ def generate(data_fn, out_fn, N_epochs): curr_chords.insert((j.offset % 4), j) # generate grammar - curr_grammar = __generate_grammar(model=model, corpus=corpus, - abstract_grammars=abstract_grammars, - values=values, val_indices=val_indices, - indices_val=indices_val, + curr_grammar = __generate_grammar(model=model, corpus=corpus, + abstract_grammars=abstract_grammars, + values=values, val_indices=val_indices, + indices_val=indices_val, max_len=max_len, max_tries=max_tries, diversity=diversity) - curr_grammar = curr_grammar.replace(' A',' C').replace(' X',' C') + curr_grammar = curr_grammar.replace(' A', ' C').replace(' X', ' C') # Pruning #1: smoothing measure curr_grammar = prune_grammar(curr_grammar) @@ -149,7 +163,7 @@ def generate(data_fn, out_fn, N_epochs): # print # of notes in curr_notes print('After pruning: %s notes' % (len([i for i in curr_notes - if isinstance(i, note.Note)]))) + if isinstance(i, note.Note)]))) # insert into the output stream for m in curr_notes: @@ -171,23 +185,30 @@ def generate(data_fn, out_fn, N_epochs): mf.write() mf.close() + ''' Runs generate() -- generating, playing, then storing a musical sequence -- with the default Metheny file. ''' + + def main(args): try: N_epochs = int(args[1]) except: - N_epochs = 128 # default + N_epochs = 128 # default # i/o settings - data_fn = 'midi/' + 'original_metheny.mid' # 'And Then I Knew' by Pat Metheny + data_fn = 'midi/' + 'original_metheny.mid' # 'And Then I Knew' by Pat Metheny out_fn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs) - if (N_epochs == 1): out_fn += '_epoch.midi' - else: out_fn += '_epochs.midi' + if (N_epochs == 1): + out_fn += '_epoch.midi' + else: + out_fn += '_epochs.midi' generate(data_fn, out_fn, N_epochs) + ''' If run as script, execute main ''' if __name__ == '__main__': import sys - main(sys.argv) \ No newline at end of file + + main(sys.argv) diff --git a/grammar.py b/grammar.py index e1f2889..999800c 100644 --- a/grammar.py +++ b/grammar.py @@ -13,60 +13,74 @@ import copy, random, pdb ''' Helper function to determine if a note is a scale tone. ''' + + def __is_scale_tone(chord, note): # Method: generate all scales that have the chord notes th check if note is # in names # Derive major or minor scales (minor if 'other') based on the quality # of the chord. - scaleType = scale.DorianScale() # i.e. minor pentatonic + scaleType = scale.DorianScale() # i.e. minor pentatonic if chord.quality == 'major': scaleType = scale.MajorScale() # Can change later to deriveAll() for flexibility. If so then use list # comprehension of form [x for a in b for x in a]. - scales = scaleType.derive(chord) # use deriveAll() later for flexibility + scales = scaleType.derive(chord) # use deriveAll() later for flexibility allPitches = list(set([pitch for pitch in scales.getPitches()])) - allNoteNames = [i.name for i in allPitches] # octaves don't matter + allNoteNames = [i.name for i in allPitches] # octaves don't matter # Get note name. Return true if in the list of note names. noteName = note.name return (noteName in allNoteNames) + ''' Helper function to determine if a note is an approach tone. ''' + + def __is_approach_tone(chord, note): # Method: see if note is +/- 1 a chord tone. for chordPitch in chord.pitches: stepUp = chordPitch.transpose(1) stepDown = chordPitch.transpose(-1) - if (note.name == stepDown.name or - note.name == stepDown.getEnharmonic().name or - note.name == stepUp.name or - note.name == stepUp.getEnharmonic().name): - return True + if (note.name == stepDown.name or + note.name == stepDown.getEnharmonic().name or + note.name == stepUp.name or + note.name == stepUp.getEnharmonic().name): + return True return False + ''' Helper function to determine if a note is a chord tone. ''' + + def __is_chord_tone(lastChord, note): return (note.name in (p.name for p in lastChord.pitches)) + ''' Helper function to generate a chord tone. ''' + + def __generate_chord_tone(lastChord): lastChordNoteNames = [p.nameWithOctave for p in lastChord.pitches] return note.Note(random.choice(lastChordNoteNames)) + ''' Helper function to generate a scale tone. ''' + + def __generate_scale_tone(lastChord): # Derive major or minor scales (minor if 'other') based on the quality # of the lastChord. - scaleType = scale.WeightedHexatonicBlues() # minor pentatonic + scaleType = scale.WeightedHexatonicBlues() # minor pentatonic if lastChord.quality == 'major': scaleType = scale.MajorScale() # Can change later to deriveAll() for flexibility. If so then use list # comprehension of form [x for a in b for x in a]. - scales = scaleType.derive(lastChord) # use deriveAll() later for flexibility + scales = scaleType.derive(lastChord) # use deriveAll() later for flexibility allPitches = list(set([pitch for pitch in scales.getPitches()])) - allNoteNames = [i.name for i in allPitches] # octaves don't matter + allNoteNames = [i.name for i in allPitches] # octaves don't matter # Return a note (no octave here) in a scale that matches the lastChord. sNoteName = random.choice(allNoteNames) @@ -75,15 +89,21 @@ def __generate_scale_tone(lastChord): sNote = note.Note(("%s%s" % (sNoteName, sNoteOctave))) return sNote + ''' Helper function to generate an approach tone. ''' + + def __generate_approach_tone(lastChord): sNote = __generate_scale_tone(lastChord) aNote = sNote.transpose(random.choice([1, -1])) return aNote + ''' Helper function to generate a random tone. ''' + + def __generate_arbitrary_tone(lastChord): - return __generate_scale_tone(lastChord) # fix later, make random note. + return __generate_scale_tone(lastChord) # fix later, make random note. ''' Given the notes in a measure ('measure') and the chords in that measure @@ -122,6 +142,8 @@ def __generate_arbitrary_tone(lastChord): "C,0.125" : chord note of (1/32) length, generated anywhere from minor 6th down to major 2nd down. (interval is not ordered). ''' + + def parse_melody(fullMeasureNotes, fullMeasureChords): # Remove extraneous elements.x measure = copy.deepcopy(fullMeasureNotes) @@ -133,18 +155,18 @@ def parse_melody(fullMeasureNotes, fullMeasureChords): # 1) measureStartTime: the offset for measure's start, e.g. 476.0. # 2) measureStartOffset: how long from the measure start to the first element. measureStartTime = measure[0].offset - (measure[0].offset % 4) - measureStartOffset = measure[0].offset - measureStartTime + measureStartOffset = measure[0].offset - measureStartTime # Iterate over the notes and rests in measure, finding the grammar for each # note in the measure and adding an abstract grammatical string for it. fullGrammar = "" - prevNote = None # Store previous note. Need for interval. - numNonRests = 0 # Number of non-rest elements. Need for updating prevNote. + prevNote = None # Store previous note. Need for interval. + numNonRests = 0 # Number of non-rest elements. Need for updating prevNote. for ix, nr in enumerate(measure): # Get the last chord. If no last chord, then (assuming chords is of length # >0) shift first chord in chords to the beginning of the measure. - try: + try: lastChord = [n for n in chords if n.offset <= nr.offset][-1] except IndexError: chords[0].offset = measureStartTime @@ -172,14 +194,14 @@ def parse_melody(fullMeasureNotes, fullMeasureChords): # SECOND, get the length for each element. e.g. 8th note = R8, but # to simplify things you'll use the direct num, e.g. R,0.125 - if (ix == (len(measure)-1)): + if (ix == (len(measure) - 1)): # formula for a in "a - b": start of measure (e.g. 476) + 4 diff = measureStartTime + 4.0 - nr.offset else: diff = measure[ix + 1].offset - nr.offset # Combine into the note info. - noteInfo = "%s,%.3f" % (elementType, nr.quarterLength) # back to diff + noteInfo = "%s,%.3f" % (elementType, nr.quarterLength) # back to diff # THIRD, get the deltas (max range up, max range down) based on where # the previous note was, +- minor 3. Skip rests (don't affect deltas). @@ -192,8 +214,8 @@ def parse_melody(fullMeasureNotes, fullMeasureChords): noteDist = interval.Interval(noteStart=prevNote, noteEnd=nr) noteDistUpper = interval.add([noteDist, "m3"]) noteDistLower = interval.subtract([noteDist, "m3"]) - intervalInfo = ",<%s,%s>" % (noteDistUpper.directedName, - noteDistLower.directedName) + intervalInfo = ",<%s,%s>" % (noteDistUpper.directedName, + noteDistLower.directedName) # print "Upper, lower: %s, %s" % (noteDistUpper, # noteDistLower) # print "Upper, lower dnames: %s, %s" % ( @@ -203,28 +225,31 @@ def parse_melody(fullMeasureNotes, fullMeasureChords): prevNote = nr # Return. Do lazy evaluation for real-time performance. - grammarTerm = noteInfo + intervalInfo + grammarTerm = noteInfo + intervalInfo fullGrammar += (grammarTerm + " ") return fullGrammar.rstrip() + ''' Given a grammar string and chords for a measure, returns measure notes. ''' + + def unparse_grammar(m1_grammar, m1_chords): m1_elements = stream.Voice() - currOffset = 0.0 # for recalculate last chord. + currOffset = 0.0 # for recalculate last chord. prevElement = None for ix, grammarElement in enumerate(m1_grammar.split(' ')): terms = grammarElement.split(',') - currOffset += float(terms[1]) # works just fine + currOffset += float(terms[1]) # works just fine # Case 1: it's a rest. Just append if terms[0] == 'R': - rNote = note.Rest(quarterLength = float(terms[1])) + rNote = note.Rest(quarterLength=float(terms[1])) m1_elements.insert(currOffset, rNote) continue # Get the last chord first so you can find chord note, scale note, etc. - try: + try: lastChord = [n for n in m1_chords if n.offset <= currOffset][-1] except IndexError: m1_chords[0].offset = 0.0 @@ -236,8 +261,8 @@ def unparse_grammar(m1_grammar, m1_chords): # Case #1: if no < > to indicate next note range. Usually this lack of < > # is for the first note (no precedent), or for rests. - if (len(terms) == 2): # Case 1: if no < >. - insertNote = note.Note() # default is C + if (len(terms) == 2): # Case 1: if no < >. + insertNote = note.Note() # default is C # Case C: chord note. if terms[0] == 'C': @@ -262,34 +287,34 @@ def unparse_grammar(m1_grammar, m1_chords): # Case #2: if < > for the increment. Usually for notes after the first one. else: # Get lower, upper intervals and notes. - interval1 = interval.Interval(terms[2].replace("<",'')) - interval2 = interval.Interval(terms[3].replace(">",'')) + interval1 = interval.Interval(terms[2].replace("<", '')) + interval2 = interval.Interval(terms[3].replace(">", '')) if interval1.cents > interval2.cents: upperInterval, lowerInterval = interval1, interval2 else: upperInterval, lowerInterval = interval2, interval1 lowPitch = interval.transposePitch(prevElement.pitch, lowerInterval) highPitch = interval.transposePitch(prevElement.pitch, upperInterval) - numNotes = int(highPitch.ps - lowPitch.ps + 1) # for range(s, e) + numNotes = int(highPitch.ps - lowPitch.ps + 1) # for range(s, e) # Case C: chord note, must be within increment (terms[2]). # First, transpose note with lowerInterval to get note that is # the lower bound. Then iterate over, and find valid notes. Then # choose randomly from those. - + if terms[0] == 'C': relevantChordTones = [] - for i in xrange(0, numNotes): + for i in range(0, numNotes): currNote = note.Note(lowPitch.transpose(i).simplifyEnharmonic()) if __is_chord_tone(lastChord, currNote): relevantChordTones.append(currNote) if len(relevantChordTones) > 1: insertNote = random.choice([i for i in relevantChordTones - if i.nameWithOctave != prevElement.nameWithOctave]) + if i.nameWithOctave != prevElement.nameWithOctave]) elif len(relevantChordTones) == 1: insertNote = relevantChordTones[0] - else: # if no choices, set to prev element +-1 whole step - insertNote = prevElement.transpose(random.choice([-2,2])) + else: # if no choices, set to prev element +-1 whole step + insertNote = prevElement.transpose(random.choice([-2, 2])) if insertNote.octave < 3: insertNote.octave = 3 insertNote.quarterLength = float(terms[1]) @@ -298,17 +323,17 @@ def unparse_grammar(m1_grammar, m1_chords): # Case S: scale note, must be within increment. elif terms[0] == 'S': relevantScaleTones = [] - for i in xrange(0, numNotes): + for i in range(0, numNotes): currNote = note.Note(lowPitch.transpose(i).simplifyEnharmonic()) if __is_scale_tone(lastChord, currNote): relevantScaleTones.append(currNote) if len(relevantScaleTones) > 1: insertNote = random.choice([i for i in relevantScaleTones - if i.nameWithOctave != prevElement.nameWithOctave]) + if i.nameWithOctave != prevElement.nameWithOctave]) elif len(relevantScaleTones) == 1: insertNote = relevantScaleTones[0] - else: # if no choices, set to prev element +-1 whole step - insertNote = prevElement.transpose(random.choice([-2,2])) + else: # if no choices, set to prev element +-1 whole step + insertNote = prevElement.transpose(random.choice([-2, 2])) if insertNote.octave < 3: insertNote.octave = 3 insertNote.quarterLength = float(terms[1]) @@ -318,17 +343,17 @@ def unparse_grammar(m1_grammar, m1_chords): # For now: handle both A and X cases. else: relevantApproachTones = [] - for i in xrange(0, numNotes): + for i in range(0, numNotes): currNote = note.Note(lowPitch.transpose(i).simplifyEnharmonic()) if __is_approach_tone(lastChord, currNote): relevantApproachTones.append(currNote) if len(relevantApproachTones) > 1: insertNote = random.choice([i for i in relevantApproachTones - if i.nameWithOctave != prevElement.nameWithOctave]) + if i.nameWithOctave != prevElement.nameWithOctave]) elif len(relevantApproachTones) == 1: insertNote = relevantApproachTones[0] - else: # if no choices, set to prev element +-1 whole step - insertNote = prevElement.transpose(random.choice([-2,2])) + else: # if no choices, set to prev element +-1 whole step + insertNote = prevElement.transpose(random.choice([-2, 2])) if insertNote.octave < 3: insertNote.octave = 3 insertNote.quarterLength = float(terms[1]) @@ -337,4 +362,4 @@ def unparse_grammar(m1_grammar, m1_chords): # update the previous element. prevElement = insertNote - return m1_elements \ No newline at end of file + return m1_elements diff --git a/lstm.py b/lstm.py index 159cb68..fd1c01d 100644 --- a/lstm.py +++ b/lstm.py @@ -16,6 +16,8 @@ import numpy as np ''' Build a 2-layer LSTM from a training corpus ''' + + def build_model(corpus, val_indices, max_len, N_epochs=128): # number of different values or words in corpus N_values = len(set(corpus)) @@ -50,4 +52,4 @@ def build_model(corpus, val_indices, max_len, N_epochs=128): model.fit(X, y, batch_size=128, nb_epoch=N_epochs) - return model \ No newline at end of file + return model diff --git a/preprocess.py b/preprocess.py index 3d1c683..4ffdf61 100644 --- a/preprocess.py +++ b/preprocess.py @@ -11,7 +11,8 @@ from music21 import * from collections import defaultdict, OrderedDict -from itertools import groupby, izip_longest +from itertools import groupby +from itertools import zip_longest as izip_longest from grammar import * #----------------------------HELPER FUNCTIONS----------------------------------# @@ -34,7 +35,7 @@ def __parse_midi(data_fn): # Change key signature to adhere to comp_stream (1 sharp, mode = major). # Also add Electric Guitar. melody_voice.insert(0, instrument.ElectricGuitar()) - melody_voice.insert(0, key.KeySignature(sharps=1, mode='major')) + melody_voice.insert(0, key.KeySignature(sharps=1)) # The accompaniment parts. Take only the best subset of parts from # the original data. Maybe add more parts, hand-add valid instruments. @@ -48,7 +49,7 @@ def __parse_midi(data_fn): # Full stream containing both the melody and the accompaniment. # All parts are flattened. full_stream = stream.Voice() - for i in xrange(len(comp_stream)): + for i in range(len(comp_stream)): full_stream.append(comp_stream[i]) full_stream.append(melody_voice) @@ -111,7 +112,7 @@ def __parse_midi(data_fn): def __get_abstract_grammars(measures, chords): # extract grammars abstract_grammars = [] - for ix in xrange(1, len(measures)): + for ix in range(1, len(measures)): m = stream.Voice() for i in measures[ix]: m.insert(i.offset, i) diff --git a/qa.py b/qa.py index 4704cb3..8a9075c 100644 --- a/qa.py +++ b/qa.py @@ -6,55 +6,72 @@ Code adapted from Evan Chow's jazzml, https://github.com/evancchow/jazzml with express permission. ''' -from itertools import izip_longest +from itertools import zip_longest as izip_longest import random from music21 import * -#----------------------------HELPER FUNCTIONS----------------------------------# +# ----------------------------HELPER FUNCTIONS----------------------------------# ''' Helper function to down num to the nearest multiple of mult. ''' + + def __roundDown(num, mult): return (float(num) - (float(num) % mult)) + ''' Helper function to round up num to nearest multiple of mult. ''' + + def __roundUp(num, mult): return __roundDown(num, mult) + mult + ''' Helper function that, based on if upDown < 0 or upDown >= 0, rounds number down or up respectively to nearest multiple of mult. ''' + + def __roundUpDown(num, mult, upDown): if upDown < 0: return __roundDown(num, mult) else: return __roundUp(num, mult) + ''' Helper function, from recipes, to iterate over list in chunks of n length. ''' + + def __grouper(iterable, n, fillvalue=None): args = [iter(iterable)] * n return izip_longest(*args, fillvalue=fillvalue) -#----------------------------PUBLIC FUNCTIONS----------------------------------# + +# ----------------------------PUBLIC FUNCTIONS----------------------------------# ''' Smooth the measure, ensuring that everything is in standard note lengths (e.g., 0.125, 0.250, 0.333 ... ). ''' + + def prune_grammar(curr_grammar): pruned_grammar = curr_grammar.split(' ') for ix, gram in enumerate(pruned_grammar): terms = gram.split(',') - terms[1] = str(__roundUpDown(float(terms[1]), 0.250, - random.choice([-1, 1]))) + terms[1] = str(__roundUpDown(float(terms[1]), 0.250, + random.choice([-1, 1]))) pruned_grammar[ix] = ','.join(terms) pruned_grammar = ' '.join(pruned_grammar) return pruned_grammar + ''' Remove repeated notes, and notes that are too close together. ''' + + def prune_notes(curr_notes): for n1, n2 in __grouper(curr_notes, n=2): - if n2 == None: # corner case: odd-length list + if n2 == None: # corner case: odd-length list continue if isinstance(n1, note.Note) and isinstance(n2, note.Note): if n1.nameWithOctave == n2.nameWithOctave: @@ -62,7 +79,10 @@ def prune_notes(curr_notes): return curr_notes + ''' Perform quality assurance on notes ''' + + def clean_up_notes(curr_notes): removeIxs = [] for ix, m in enumerate(curr_notes): @@ -73,8 +93,8 @@ def clean_up_notes(curr_notes): # Sorted, so same offset would be consecutive notes. if (ix < (len(curr_notes) - 1)): if (m.offset == curr_notes[ix + 1].offset and - isinstance(curr_notes[ix + 1], note.Note)): + isinstance(curr_notes[ix + 1], note.Note)): removeIxs.append((ix + 1)) curr_notes = [i for ix, i in enumerate(curr_notes) if ix not in removeIxs] - return curr_notes \ No newline at end of file + return curr_notes