Commit baf6f14d authored by Dave Hylands's avatar Dave Hylands
Browse files

Enhance str.format support

This adds support for almost everything (the comma isn't currently
supported).

The "unspecified" type with floats also doesn't behave exactly like
python.

Tested under unix with float and double
Spot tested on stmhal
parent e44d26ae
......@@ -14,6 +14,7 @@
***********************************************************************/
#include <stdlib.h>
#include <stdint.h>
#include "mpconfig.h"
......
......@@ -198,6 +198,10 @@ machine_int_t mp_obj_get_int(mp_obj_t arg) {
return MP_OBJ_SMALL_INT_VALUE(arg);
} else if (MP_OBJ_IS_TYPE(arg, &mp_type_int)) {
return mp_obj_int_get_checked(arg);
#if MICROPY_ENABLE_FLOAT
} else if (MP_OBJ_IS_TYPE(arg, &mp_type_float)) {
return mp_obj_float_get(arg);
#endif
} else {
nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "can't convert %s to int", mp_obj_get_type_str(arg)));
}
......
......@@ -9,6 +9,7 @@
#include "obj.h"
#include "runtime0.h"
#include "runtime.h"
#include "pfenv.h"
typedef struct _mp_obj_str_t {
mp_obj_base_t base;
......@@ -492,28 +493,389 @@ STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) {
return mp_obj_new_str(orig_str + first_good_char_pos, stripped_len, false);
}
// Takes an int arg, but only parses unsigned numbers, and only changes
// *num if at least one digit was parsed.
static int str_to_int(const char *str, int *num) {
const char *s = str;
if (unichar_isdigit(*s)) {
*num = 0;
do {
*num = *num * 10 + (*s - '0');
s++;
}
while (unichar_isdigit(*s));
}
return s - str;
}
static bool isalignment(char ch) {
return ch && strchr("<>=^", ch) != NULL;
}
static bool istype(char ch) {
return ch && strchr("bcdeEfFgGnosxX%", ch) != NULL;
}
static bool arg_looks_integer(mp_obj_t arg) {
return MP_OBJ_IS_TYPE(arg, &mp_type_bool) || MP_OBJ_IS_INT(arg);
}
static bool arg_looks_numeric(mp_obj_t arg) {
return arg_looks_integer(arg)
#if MICROPY_ENABLE_FLOAT
|| MP_OBJ_IS_TYPE(arg, &mp_type_float)
#endif
;
}
mp_obj_t str_format(uint n_args, const mp_obj_t *args) {
assert(MP_OBJ_IS_STR(args[0]));
GET_STR_DATA_LEN(args[0], str, len);
int arg_i = 1;
int arg_i = 0;
vstr_t *vstr = vstr_new();
pfenv_t pfenv_vstr;
pfenv_vstr.data = vstr;
pfenv_vstr.print_strn = pfenv_vstr_add_strn;
for (const byte *top = str + len; str < top; str++) {
if (*str == '{') {
if (*str == '}') {
str++;
if (str < top && *str == '{') {
vstr_add_char(vstr, '{');
if (str < top && *str == '}') {
vstr_add_char(vstr, '}');
continue;
}
nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "Single '}' encountered in format string"));
}
if (*str != '{') {
vstr_add_char(vstr, *str);
continue;
}
str++;
if (str < top && *str == '{') {
vstr_add_char(vstr, '{');
continue;
}
// replacement_field ::= "{" [field_name] ["!" conversion] [":" format_spec] "}"
vstr_t *field_name = NULL;
char conversion = '\0';
vstr_t *format_spec = NULL;
if (str < top && *str != '}' && *str != '!' && *str != ':') {
field_name = vstr_new();
while (str < top && *str != '}' && *str != '!' && *str != ':') {
vstr_add_char(field_name, *str++);
}
vstr_add_char(field_name, '\0');
}
// conversion ::= "r" | "s"
if (str < top && *str == '!') {
str++;
if (str < top && (*str == 'r' || *str == 's')) {
conversion = *str++;
} else {
while (str < top && *str != '}') str++;
if (arg_i >= n_args) {
nlr_jump(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range"));
nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "end of format while looking for conversion specifier"));
}
}
if (str < top && *str == ':') {
str++;
// {:} is the same as {}, which is the same as {!s}
// This makes a difference when passing in a True or False
// '{}'.format(True) returns 'True'
// '{:d}'.format(True) returns '1'
// So we treat {:} as {} and this later gets treated to be {!s}
if (*str != '}') {
format_spec = vstr_new();
while (str < top && *str != '}') {
vstr_add_char(format_spec, *str++);
}
// TODO: may be PRINT_REPR depending on formatting code
mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, args[arg_i], PRINT_STR);
arg_i++;
vstr_add_char(format_spec, '\0');
}
}
if (str >= top) {
nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "unmatched '{' in format"));
}
if (*str != '}') {
nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "expected ':' after format specifier"));
}
mp_obj_t arg = mp_const_none;
if (field_name) {
if (arg_i > 0) {
nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "cannot switch from automatic field numbering to manual field specification"));
}
int index;
if (str_to_int(vstr_str(field_name), &index) != vstr_len(field_name) - 1) {
nlr_jump(mp_obj_new_exception_msg(&mp_type_KeyError, "attributes not supported yet"));
}
if (index >= n_args - 1) {
nlr_jump(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range"));
}
arg = args[index + 1];
arg_i = -1;
vstr_free(field_name);
field_name = NULL;
} else {
vstr_add_char(vstr, *str);
if (arg_i < 0) {
nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "cannot switch from manual field specification to automatic field numbering"));
}
if (arg_i >= n_args - 1) {
nlr_jump(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range"));
}
arg = args[arg_i + 1];
arg_i++;
}
if (!format_spec && !conversion) {
conversion = 's';
}
if (conversion) {
mp_print_kind_t print_kind;
if (conversion == 's') {
print_kind = PRINT_STR;
} else if (conversion == 'r') {
print_kind = PRINT_REPR;
} else {
nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "Unknown conversion specifier %c", conversion));
}
vstr_t *arg_vstr = vstr_new();
mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, arg_vstr, arg, print_kind);
arg = mp_obj_new_str((const byte *)vstr_str(arg_vstr), vstr_len(arg_vstr), false);
vstr_free(arg_vstr);
}
char sign = '\0';
char fill = '\0';
char align = '\0';
int width = -1;
int precision = -1;
char type = '\0';
int flags = 0;
if (format_spec) {
// The format specifier (from http://docs.python.org/2/library/string.html#formatspec)
//
// [[fill]align][sign][#][0][width][,][.precision][type]
// fill ::= <any character>
// align ::= "<" | ">" | "=" | "^"
// sign ::= "+" | "-" | " "
// width ::= integer
// precision ::= integer
// type ::= "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%"
const char *s = vstr_str(format_spec);
if (isalignment(*s)) {
align = *s++;
} else if (*s && isalignment(s[1])) {
fill = *s++;
align = *s++;
}
if (*s == '+' || *s == '-' || *s == ' ') {
if (*s == '+') {
flags |= PF_FLAG_SHOW_SIGN;
} else if (*s == ' ') {
flags |= PF_FLAG_SPACE_SIGN;
}
sign = *s++;
}
if (*s == '#') {
flags |= PF_FLAG_SHOW_PREFIX;
s++;
}
if (*s == '0') {
if (!align) {
align = '=';
}
if (!fill) {
fill = '0';
}
}
s += str_to_int(s, &width);
if (*s == ',') {
flags |= PF_FLAG_SHOW_COMMA;
s++;
}
if (*s == '.') {
s++;
s += str_to_int(s, &precision);
}
if (istype(*s)) {
type = *s++;
}
if (*s) {
nlr_jump(mp_obj_new_exception_msg(&mp_type_KeyError, "Invalid conversion specification"));
}
vstr_free(format_spec);
format_spec = NULL;
}
if (!align) {
if (arg_looks_numeric(arg)) {
align = '>';
} else {
align = '<';
}
}
if (!fill) {
fill = ' ';
}
if (sign) {
if (type == 's') {
nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "Sign not allowed in string format specifier"));
}
if (type == 'c') {
nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "Sign not allowed with integer format specifier 'c'"));
}
} else {
sign = '-';
}
switch (align) {
case '<': flags |= PF_FLAG_LEFT_ADJUST; break;
case '=': flags |= PF_FLAG_PAD_AFTER_SIGN; break;
case '^': flags |= PF_FLAG_CENTER_ADJUST; break;
}
if (arg_looks_integer(arg)) {
switch (type) {
case 'b':
pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 2, 'a', flags, fill, width);
continue;
case 'c':
{
char ch = mp_obj_get_int(arg);
pfenv_print_strn(&pfenv_vstr, &ch, 1, flags, fill, width);
continue;
}
case '\0': // No explicit format type implies 'd'
case 'n': // I don't think we support locales in uPy so use 'd'
case 'd':
pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 10, 'a', flags, fill, width);
continue;
case 'o':
pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 8, 'a', flags, fill, width);
continue;
case 'x':
pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 16, 'a', flags, fill, width);
continue;
case 'X':
pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 16, 'A', flags, fill, width);
continue;
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
case '%':
// The floating point formatters all work with anything that
// looks like an integer
break;
default:
nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
"Unknown format code '%c' for object of type '%s'", type, mp_obj_get_type_str(arg)));
}
}
#if MICROPY_ENABLE_FLOAT
if (arg_looks_numeric(arg)) {
if (!type) {
// Even though the docs say that an unspecified type is the same
// as 'g', there is one subtle difference, when the exponent
// is one less than the precision.
//
// '{:10.1}'.format(0.0) ==> '0e+00'
// '{:10.1g}'.format(0.0) ==> '0'
//
// TODO: Figure out how to deal with this.
//
// A proper solution would involve adding a special flag
// or something to format_float, and create a format_double
// to deal with doubles. In order to fix this when using
// sprintf, we'd need to use the e format and tweak the
// returned result to strip trailing zeros like the g format
// does.
//
// {:10.3} and {:10.2e} with 1.23e2 both produce 1.23e+02
// but with 1.e2 you get 1e+02 and 1.00e+02
//
// Stripping the trailing 0's (like g) does would make the
// e format give us the right format.
//
// CPython sources say:
// Omitted type specifier. Behaves in the same way as repr(x)
// and str(x) if no precision is given, else like 'g', but with
// at least one digit after the decimal point. */
type = 'g';
}
if (type == 'n') {
type = 'g';
}
flags |= PF_FLAG_PAD_NAN_INF; // '{:06e}'.format(float('-inf')) should give '-00inf'
switch (type) {
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg), type, flags, fill, width, precision);
break;
case '%':
flags |= PF_FLAG_ADD_PERCENT;
pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg) * 100.0F, 'f', flags, fill, width, precision);
break;
default:
nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
"Unknown format code '%c' for object of type 'float'",
type, mp_obj_get_type_str(arg)));
}
#endif
} else {
if (align == '=') {
nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "'=' alignment not allowed in string format specifier"));
}
switch (type) {
case '\0':
mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, arg, PRINT_STR);
break;
case 's':
{
uint len;
const char *s = mp_obj_str_get_data(arg, &len);
if (precision < 0) {
precision = len;
}
if (len > precision) {
len = precision;
}
pfenv_print_strn(&pfenv_vstr, s, len, flags, fill, width);
break;
}
default:
nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
"Unknown format code '%c' for object of type 'str'",
type, mp_obj_get_type_str(arg)));
}
}
}
......
#include <stdint.h>
#include <string.h>
///#include "std.h"
#include "misc.h"
#include "mpconfig.h"
#include "qstr.h"
#include "obj.h"
#include "pfenv.h"
#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
#include <stdio.h>
#endif
#if MICROPY_ENABLE_FLOAT
#include "formatfloat.h"
#endif
#define PF_PAD_SIZE 16
static const char *pad_spaces = " ";
static const char *pad_zeroes = "0000000000000000";
void pfenv_vstr_add_strn(void *data, const char *str, unsigned int len){
vstr_add_strn(data, str, len);
}
int pfenv_print_strn(const pfenv_t *pfenv, const char *str, unsigned int len, int flags, char fill, int width) {
int left_pad = 0;
int right_pad = 0;
int pad = width - len;
char pad_fill[PF_PAD_SIZE];
const char *pad_chars;
if (!fill || fill == ' ' ) {
pad_chars = pad_spaces;
} else if (fill == '0') {
pad_chars = pad_zeroes;
} else {
memset(pad_fill, fill, PF_PAD_SIZE);
pad_chars = pad_fill;
}
if (flags & PF_FLAG_CENTER_ADJUST) {
left_pad = pad / 2;
right_pad = pad - left_pad;
} else if (flags & PF_FLAG_LEFT_ADJUST) {
right_pad = pad;
} else {
left_pad = pad;
}
if (left_pad) {
while (left_pad > 0) {
int p = left_pad;
if (p > PF_PAD_SIZE)
p = PF_PAD_SIZE;
pfenv->print_strn(pfenv->data, pad_chars, p);
left_pad -= p;
}
}
pfenv->print_strn(pfenv->data, str, len);
if (right_pad) {
while (right_pad > 0) {
int p = right_pad;
if (p > PF_PAD_SIZE)
p = PF_PAD_SIZE;
pfenv->print_strn(pfenv->data, pad_chars, p);
right_pad -= p;
}
}
return len;
}
// enough room for 32 signed number
#define INT_BUF_SIZE (16)
int pfenv_print_int(const pfenv_t *pfenv, unsigned int x, int sgn, int base, int base_char, int flags, char fill, int width) {
char sign = 0;
if (sgn) {
if ((int)x < 0) {
sign = '-';
x = -x;
} else if (flags & PF_FLAG_SHOW_SIGN) {
sign = '+';
} else if (flags & PF_FLAG_SPACE_SIGN) {
sign = ' ';
}
}
char buf[INT_BUF_SIZE];
char *b = buf + INT_BUF_SIZE;
if (x == 0) {
*(--b) = '0';
} else {
do {
int c = x % base;
x /= base;
if (c >= 10) {
c += base_char - 10;
} else {
c += '0';
}
*(--b) = c;
} while (b > buf && x != 0);
}
char prefix_char = '\0';
if (flags & PF_FLAG_SHOW_PREFIX) {
if (base == 2) {
prefix_char = base_char + 'b' - 'a';
} else if (base == 8) {
prefix_char = base_char + 'o' - 'a';
} else if (base == 16) {
prefix_char = base_char + 'x' - 'a';
}
}
int len = 0;
if (flags & PF_FLAG_PAD_AFTER_SIGN) {
if (sign) {
len += pfenv_print_strn(pfenv, &sign, 1, flags, fill, 1);
width--;
}
if (prefix_char) {
len += pfenv_print_strn(pfenv, "0", 1, flags, fill, 1);
len += pfenv_print_strn(pfenv, &prefix_char, 1, flags, fill, 1);
width -= 2;
}
} else {
if (prefix_char && b > &buf[1]) {
*(--b) = prefix_char;
*(--b) = '0';
}
if (sign && b > buf) {
*(--b) = sign;
}
}
len += pfenv_print_strn(pfenv, b, buf + INT_BUF_SIZE - b, flags, fill, width);
return len;
}
#if MICROPY_ENABLE_FLOAT
int pfenv_print_float(const pfenv_t *pfenv, mp_float_t f, char fmt, int flags, char fill, int width, int prec) {
char buf[32];
char sign = '\0';
int chrs = 0;
if (flags & PF_FLAG_SHOW_SIGN) {
sign = '+';
}
else
if (flags & PF_FLAG_SPACE_SIGN) {
sign = ' ';
}
int len;
#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
len = format_float(f, buf, sizeof(buf), fmt, prec, sign);
#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
char fmt_buf[6];
char *fmt_s = fmt_buf;
*fmt_s++ = '%';
if (sign) {
*fmt_s++ = sign;
}
*fmt_s++ = '.';
*fmt_s++ = '*';
*fmt_s++ = fmt;
*fmt_s = '\0';
len = snprintf(buf, sizeof(buf), fmt_buf, prec, f);
#else
#error Unknown MICROPY FLOAT IMPL
#endif
char *s = buf;
if ((flags & PF_FLAG_ADD_PERCENT) && (len + 1) < sizeof(buf)) {
buf[len++] = '%';
buf[len] = '\0';
}
// buf[0] < '0' returns true if the first character is space, + or -
if ((flags & PF_FLAG_PAD_AFTER_SIGN) && buf[0] < '0') {
// We have a sign character
s++;
if (*s <= '9' || (flags & PF_FLAG_PAD_NAN_INF)) {
// We have a number, or we have a inf/nan and PAD_NAN_INF is set