1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
//! Parse 2i programs.
//!
//! This module contains functions for parsing 2i programs.

use std::io::BufReader;
use std::io::prelude::*;

use regex::Regex;

use super::{Error, Result};
use super::instruction::Instruction;

/// Parse 2i programs in string representation into arrays of `Instruction`s.
///
/// Ignores empty lines and everything after the `#` char. You can use any char
/// other than `0`, `1` and `:` to format your program for improved readability.
///
/// Instructions can optionally be given an explicit address by prefixing them
/// with the binary representation of the address followed by `:`. Instructions
/// without an explicit address are saved at the next unused address. All
/// addresses must be strictly nondecreasing. Missing instructions are replaced
/// with self-looping instructions.
///
/// # Examples
///
/// ```text
/// # Read value from FC into register 0
///
///        00,00001 00 000|1100 01 01,1100 0
/// 00001: 00,00000 01 000|0000 01 10,0001 0
/// ```
pub fn read_program<R: Read>(reader: R) -> Result<[Instruction; 32]> {
    let instructions = parse_instructions(reader)?;

    let mut final_instructions = [Instruction::default(); 32];

    // Fill the array with looping instructions
    for (address, instruction) in final_instructions.iter_mut().enumerate() {
        *instruction = Instruction::new_looping(address).unwrap();
    }

    // Copy the loaded program into the array at the right addresses
    for (i, instruction) in instructions.iter().enumerate() {
        if let &Some(instruction) = instruction {
            final_instructions[i] = instruction;
        }
    }

    Ok(final_instructions)
}

/// Iterator stored on the stack with variable length and storage size of 2
macro_rules! alternative_2 {
    // TODO: Using a custom iterator instead of once would be more efficient
    ($first:expr) => (
        ::std::iter::once($first).chain(::std::iter::once($first)).take(1)
    );
    ($first:expr, $second:expr) => (
        ::std::iter::once($first).chain(::std::iter::once($second)).take(2)
    );
}

/// Parse 2i programs in string representation and return only the reachable
/// instructions.
///
/// Instructions are considered reachable if there is a chain of instructions
/// starting from the first one at address 0 to it. This also considers
/// conditional jumps.
///
/// For details on the syntax of the string representation see `read_program`.
pub fn read_reachable_program<R: Read>(reader: R) -> Result<Vec<(u8, Instruction)>> {
    #[derive(Clone, Copy)]
    enum S {
        Empty, // Not yet visited
        Visited, // Visited, but instruction is missing (will get default one)
        Instruction(Instruction), // Visited and containing a instruction
    }

    let instructions = parse_instructions(reader)?;
    let mut reachable_instructions = [S::Empty; 32];

    // The instruction at address 0 is reachable by definition if it exists
    reachable_instructions[0] = if let Some(inst) = instructions[0] {
        S::Instruction(inst)
    } else {
        return Err(Error::Parse("No instruction reachable"));
    };

    // Since instructions can jump to earlier addresses, we have to iterate
    // until no new instruction is found.
    let mut finished = false;
    while !finished {
        finished = true;

        for i in 0..reachable_instructions.len() {
            if let S::Instruction(inst) = reachable_instructions[i] {
                let na = inst.get_next_instruction_address();

                // Consider both target addresses for conditional jumps
                let target_addresses = if inst.get_address_control() == 0 {
                    alternative_2!(na)
                } else {
                    alternative_2!(na & !1u8, na | 1u8)
                };

                for addr in target_addresses {
                    let addr = addr as usize;
                    // Only update instruction addresses that were not yet
                    // visited. This ensures that the algorithm terminates
                    if let S::Empty = reachable_instructions[addr] {
                        finished = false;
                        if let Some(inst) = instructions[addr] {
                            reachable_instructions[addr] = S::Instruction(inst);
                        } else {
                            reachable_instructions[addr] = S::Visited;
                        }
                    }
                }
            }
        }
    }

    // Addresses which were visited but did not have a valid instruction get
    // a default one (NOP, JMP 0)
    Ok(reachable_instructions.iter().enumerate().filter_map(|(i,inst)| {
        match *inst {
            S::Empty => None,
            S::Visited => Some((i as u8, Instruction::new_looping(i).unwrap())),
            S::Instruction(inst) => Some((i as u8, inst)),
        }
    }).collect())
}

/// Actually parse the instructions from the given reader
///
/// For details on the syntax of the string representation see `read_program`.
fn parse_instructions<R: Read>(reader: R) -> Result<[Option<Instruction>; 32]> {
    let mut instructions = [None; 32];
    let mut min_address = 0;
    let explicit_address = Regex::new(r"^(?P<addr>[01]{5})\s*:\s*(?P<inst>.*)$").unwrap();

    let reader = BufReader::new(reader);
    for line in reader.lines() {
        let line = line?;

        // Remove whitespace and comments that start with #
        let line = match line.find('#') {
            Some(start) => line[..start].trim(),
            None => line.trim(),
        };

        // Ignore empty lines
        if line.is_empty() {
            continue;
        }

        // Check if an explicit address is given
        let (instruction, address) = if line.contains(':') {
            match explicit_address.captures(line) {
                Some(matches) => {
                    let inst = matches.name("inst").unwrap().as_str();
                    let addr = matches.name("addr").unwrap().as_str();
                    (inst, Some(addr))
                }
                None => return Err(Error::Parse("Invalid instruction address")),
            }
        } else {
            (line, None)
        };

        // Parse Instruction
        let raw_inst = convert_binary_string_to_int(&instruction);
        let instruction = Instruction::new(raw_inst)?;

        min_address = if let Some(address) = address {
            // Parse specified address
            let address = convert_binary_string_to_int(&address) as usize;
            if address >= 32 {
                return Err(Error::Parse("Specified instruction address too big"));
            }

            if address < min_address {
                return Err(Error::Parse("Addresses must be nondecreasing"));
            }

            if instructions[address].is_none() {
                instructions[address] = Some(instruction);
                address + 1
            } else {
                return Err(Error::Parse("Two instructions with the same address"));
            }
        } else {
            // Use the min_address when not given explicitly
            let address = min_address;
            if address < 32 {
                instructions[address] = Some(instruction);
                address + 1
            } else {
                return Err(Error::Parse("Too many instructions in this program"));
            }
        }
    }

    Ok(instructions)
}

/// Convert a binary string to a u32 ignoring any chars other than 0 and 1
///
/// If the string contains more than 32 valid bits, the excess bits at the
/// beginning are ignored.
fn convert_binary_string_to_int(s: &str) -> u32 {
    let mut result = 0u32;

    for bit in s.chars().filter_map(|c| {
        match c {
            '0' => Some(0),
            '1' => Some(1),
            _ => None,
        }
    }) {
        result = result << 1 | bit;
    }

    result
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Cursor;

    #[test]
    fn parser() {
        let program = parse_instructions(Cursor::new("\
            # Simple program\n\
            \n\
            00000: 00 00001 000000000000000000 # first instruction\n\
          \n       00 00011 000000000000000000# second instruction\n\
            00011: 00 11111 000000000000000000\n\
          \n       00 00000 000000000000000000\n\
            11111 : 00 00011 | 00 | 000 1111 01 | 01 0100 | 0\n\
        ".to_owned())).unwrap();

        assert_eq!(program.iter().filter_map(|e| *e).collect::<Vec<_>>().as_slice(), &[
            Instruction::new(0b00_00001_000000000000000000).unwrap(),
            Instruction::new(0b00_00011_000000000000000000).unwrap(),
            Instruction::new(0b00_11111_000000000000000000).unwrap(),
            Instruction::new(0b00_00000_000000000000000000).unwrap(),
            Instruction::new(0b00_00011_000001111010101000).unwrap(),
        ]);
    }

    #[test]
    #[should_panic(expected = "Invalid instruction address")]
    fn invalid_address() {
        let _ = parse_instructions(Cursor::new("\
            0 0 0 0 0: 00 00001 000000000000000000\n\
        ".to_owned())).unwrap();
    }

    #[test]
    #[should_panic(expected = "Addresses must be nondecreasing")]
    fn decreasing_address() {
        let _ = parse_instructions(Cursor::new("\
            00001: 00 00000 000000000000000000\n\
            00000: 00 00001 000000000000000000\n\
        ".to_owned())).unwrap();
    }

    #[test]
    #[should_panic(expected = "Too many instructions in this program")]
    fn overflowing_address() {
        let _ = parse_instructions(Cursor::new("\
            11111: 00 00000 000000000000000000\n\
                   00 00000 000000000000000000\n\
        ".to_owned())).unwrap();
    }

    #[test]
    fn fill_with_looping() {
        let program = Cursor::new("\
            00000: 00 00001 000000000000000000\n\
        ".to_owned());
        assert_eq!(read_reachable_program(program).unwrap().as_slice(), &[
            (0, Instruction::new(0b00_00001_000000000000000000).unwrap()),
            (1, Instruction::new(0b00_00001_000000000000000000).unwrap()),
        ]);

        let program = "\
            00010: 00 00000 000000000000000000\n\
        ".to_owned();
        assert_eq!(&read_program(Cursor::new(&program)).unwrap()[..4], &[
            (Instruction::new(0b00_00000_000000000000000000).unwrap()),
            (Instruction::new(0b00_00001_000000000000000000).unwrap()),
            (Instruction::new(0b00_00000_000000000000000000).unwrap()),
            (Instruction::new(0b00_00011_000000000000000000).unwrap()),
        ]);
        assert_eq!(&read_program(Cursor::new(&program)).unwrap()[30..], &[
            (Instruction::new(0b00_11110_000000000000000000).unwrap()),
            (Instruction::new(0b00_11111_000000000000000000).unwrap()),
        ]);
    }

    #[test]
    fn reachable_backjump() {
        let program = Cursor::new("\
            00000: 00 00100 000000000000000000\n\
            00001: 00 11111 000000000000000000\n\
            00010: 00 00001 000000000000000000\n\
            00100: 00 00010 000000000000000000\n\
            11111: 00 00000 000000000000000000\n\
        ".to_owned());
        assert_eq!(read_reachable_program(program).unwrap().as_slice(), &[
            ( 0, Instruction::new(0b00_00100_000000000000000000).unwrap()),
            ( 1, Instruction::new(0b00_11111_000000000000000000).unwrap()),
            ( 2, Instruction::new(0b00_00001_000000000000000000).unwrap()),
            ( 4, Instruction::new(0b00_00010_000000000000000000).unwrap()),
            (31, Instruction::new(0b00_00000_000000000000000000).unwrap()),
        ]);
    }

    #[test]
    fn reachable_address_control() {
        let program = Cursor::new("\
            00000: 11 00010 000000000000000000\n\
            00010: 00 00000 000000000000000000\n\
            00011: 00 00000 000000000000000000\n\
        ".to_owned());
        assert_eq!(read_reachable_program(program).unwrap().as_slice(), &[
            (0, Instruction::new(0b11_00010_000000000000000000).unwrap()),
            (2, Instruction::new(0b00_00000_000000000000000000).unwrap()),
            (3, Instruction::new(0b00_00000_000000000000000000).unwrap()),
        ]);
    }

    #[test]
    #[should_panic(expected = "No instruction reachable")]
    fn reachable_empty() {
        let program = Cursor::new("".to_owned());
        read_reachable_program(program).unwrap();
    }
}